Model templates encoder only (#8509)

* Model templates * TensorFlow * Remove pooler * CI * Tokenizer + Refactoring * Encoder-Decoder * Let's go testing * Encoder-Decoder in TF * Let's go testing in TF * Documentation * README * Fixes * Better names * Style * Update docs * Choose to skip either TF or PT * Code quality fixes * Add to testing suite * Update file path * Cookiecutter path * Update `transformers` path * Handle rebasing * Remove seq2seq from model templates * Remove s2s config * Apply Sylvain and Patrick comments * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Last fixes from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Model templates encoder only (#8509)
* Model templates * TensorFlow * Remove pooler * CI * Tokenizer + Refactoring * Encoder-Decoder * Let's go testing * Encoder-Decoder in TF * Let's go testing in TF * Documentation * README * Fixes * Better names * Style * Update docs * Choose to skip either TF or PT * Code quality fixes * Add to testing suite * Update file path * Cookiecutter path * Update `transformers` path * Handle rebasing * Remove seq2seq from model templates * Remove s2s config * Apply Sylvain and Patrick comments * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Last fixes from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
826f0457 · Lysandre Debut · GitHub · 42e2d02e · 826f0457 · 826f0457
Unverified Commit 826f0457 authored Nov 13, 2020 by Lysandre Debut Committed by GitHub Nov 13, 2020
20 changed files
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -4,10 +4,12 @@ on:
  push:
    branches:
      - master
+      - model-templates
    paths:
      - "src/**"
      - "tests/**"
      - ".github/**"
+      - "templates/**"
  # pull_request:
  repository_dispatch:
@@ -55,6 +57,14 @@ jobs:
          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
+      - name: Create model files
+        run: |
+          source .env/bin/activate
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/encoder-bert-tokenizer.json --path=templates/cookiecutter
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/pt-encoder-bert-tokenizer.json --path=templates/cookiecutter
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/standalone.json --path=templates/cookiecutter
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/tf-encoder-bert-tokenizer.json --path=templates/cookiecutter
      - name: Run all non-slow tests on GPU
        env:
          OMP_NUM_THREADS: 1
@@ -116,6 +126,14 @@ jobs:
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
+      - name: Create model files
+        run: |
+          source .env/bin/activate
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/encoder-bert-tokenizer.json --path=templates/cookiecutter
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/pt-encoder-bert-tokenizer.json --path=templates/cookiecutter
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/standalone.json --path=templates/cookiecutter
+          transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/tf-encoder-bert-tokenizer.json --path=templates/cookiecutter
      - name: Run all non-slow tests on GPU
        env:
          OMP_NUM_THREADS: 1

--- a/setup.py
+++ b/setup.py
@@ -98,12 +98,13 @@ else:
 extras["tokenizers"] = ["tokenizers==0.9.2"]
 extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]
+extras["modelcreation"] = ["cookiecutter==1.7.2"]
 extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
 extras["sentencepiece"] = ["sentencepiece==0.1.91"]
 extras["retrieval"] = ["faiss-cpu", "datasets"]
-extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"]
+extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"] + extras["modelcreation"]
 # sphinx-rtd-theme==0.5.0 introduced big changes in the style.
 extras["docs"] = ["recommonmark", "sphinx==3.2.1", "sphinx-markdown-tables", "sphinx-rtd-theme==0.4.3", "sphinx-copybutton"]
 extras["quality"] = ["black >= 20.8b1", "isort >= 5.5.4", "flake8 >= 3.8.3"]
@@ -111,7 +112,7 @@ extras["quality"] = ["black >= 20.8b1", "isort >= 5.5.4", "flake8 >= 3.8.3"]
 extras["all"] = extras["tf"] + extras["torch"] + extras["flax"] + extras["sentencepiece"] + extras["tokenizers"]
-extras["dev"] = extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] + extras["docs"] + extras["sklearn"]
+extras["dev"] = extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] + extras["docs"] + extras["sklearn"] + extras["modelcreation"]
 setup(

--- a/src/transformers/commands/add_new_model.py
+++ b/src/transformers/commands/add_new_model.py
+import json
+import os
+import shutil
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from typing import List
+from cookiecutter.main import cookiecutter
+from transformers.commands import BaseTransformersCLICommand
+from ..utils import logging
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+def add_new_model_command_factory(args: Namespace):
+    return AddNewModelCommand(args.testing, args.testing_file, path=args.path)
+class AddNewModelCommand(BaseTransformersCLICommand):
+    @staticmethod
+    def register_subcommand(parser: ArgumentParser):
+        add_new_model_parser = parser.add_parser("add-new-model")
+        add_new_model_parser.add_argument("--testing", action="store_true", help="If in testing mode.")
+        add_new_model_parser.add_argument("--testing_file", type=str, help="Configuration file on which to run.")
+        add_new_model_parser.add_argument(
+            "--path", type=str, help="Path to cookiecutter. Should only be used for testing purposes."
+        )
+        add_new_model_parser.set_defaults(func=add_new_model_command_factory)
+    def __init__(self, testing: bool, testing_file: str, path=None, *args):
+        self._testing = testing
+        self._testing_file = testing_file
+        self._path = path
+    def run(self):
+        # Ensure that there is no other `cookiecutter-template-xxx` directory in the current working directory
+        directories = [directory for directory in os.listdir() if "cookiecutter-template-" == directory[:22]]
+        if len(directories) > 0:
+            raise ValueError(
+                "Several directories starting with `cookiecutter-template-` in current working directory. "
+                "Please clean your directory by removing all folders startign with `cookiecutter-template-` or "
+                "change your working directory."
+            )
+        path_to_transformer_root = (
+            Path(__file__).parent.parent.parent.parent if self._path is None else Path(self._path).parent.parent
+        )
+        path_to_cookiecutter = path_to_transformer_root / "templates" / "cookiecutter"
+        # Execute cookiecutter
+        if not self._testing:
+            cookiecutter(str(path_to_cookiecutter))
+        else:
+            with open(self._testing_file, "r") as configuration_file:
+                testing_configuration = json.load(configuration_file)
+            cookiecutter(
+                str(path_to_cookiecutter if self._path is None else self._path),
+                no_input=True,
+                extra_context=testing_configuration,
+            )
+        directory = [directory for directory in os.listdir() if "cookiecutter-template-" in directory[:22]][0]
+        # Retrieve configuration
+        with open(directory + "/configuration.json", "r") as configuration_file:
+            configuration = json.load(configuration_file)
+        lowercase_model_name = configuration["lowercase_modelname"]
+        pytorch_or_tensorflow = configuration["generate_tensorflow_and_pytorch"]
+        os.remove(f"{directory}/configuration.json")
+        output_pytorch = "PyTorch" in pytorch_or_tensorflow
+        output_tensorflow = "TensorFlow" in pytorch_or_tensorflow
+        shutil.move(
+            f"{directory}/configuration_{lowercase_model_name}.py",
+            f"{path_to_transformer_root}/src/transformers/configuration_{lowercase_model_name}.py",
+        )
+        def remove_copy_lines(path):
+            with open(path, "r") as f:
+                lines = f.readlines()
+            with open(path, "w") as f:
+                for line in lines:
+                    if "# Copied from transformers." not in line:
+                        f.write(line)
+        if output_pytorch:
+            if not self._testing:
+                remove_copy_lines(f"{directory}/modeling_{lowercase_model_name}.py")
+            shutil.move(
+                f"{directory}/modeling_{lowercase_model_name}.py",
+                f"{path_to_transformer_root}/src/transformers/modeling_{lowercase_model_name}.py",
+            )
+            shutil.move(
+                f"{directory}/test_modeling_{lowercase_model_name}.py",
+                f"{path_to_transformer_root}/tests/test_modeling_{lowercase_model_name}.py",
+            )
+        else:
+            os.remove(f"{directory}/modeling_{lowercase_model_name}.py")
+            os.remove(f"{directory}/test_modeling_{lowercase_model_name}.py")
+        if output_tensorflow:
+            if not self._testing:
+                remove_copy_lines(f"{directory}/modeling_tf_{lowercase_model_name}.py")
+            shutil.move(
+                f"{directory}/modeling_tf_{lowercase_model_name}.py",
+                f"{path_to_transformer_root}/src/transformers/modeling_tf_{lowercase_model_name}.py",
+            )
+            shutil.move(
+                f"{directory}/test_modeling_tf_{lowercase_model_name}.py",
+                f"{path_to_transformer_root}/tests/test_modeling_tf_{lowercase_model_name}.py",
+            )
+        else:
+            os.remove(f"{directory}/modeling_tf_{lowercase_model_name}.py")
+            os.remove(f"{directory}/test_modeling_tf_{lowercase_model_name}.py")
+        shutil.move(
+            f"{directory}/{lowercase_model_name}.rst",
+            f"{path_to_transformer_root}/docs/source/model_doc/{lowercase_model_name}.rst",
+        )
+        shutil.move(
+            f"{directory}/tokenization_{lowercase_model_name}.py",
+            f"{path_to_transformer_root}/src/transformers/tokenization_{lowercase_model_name}.py",
+        )
+        from os import fdopen, remove
+        from shutil import copymode, move
+        from tempfile import mkstemp
+        def replace(original_file: str, line_to_copy_below: str, lines_to_copy: List[str]):
+            # Create temp file
+            fh, abs_path = mkstemp()
+            line_found = False
+            with fdopen(fh, "w") as new_file:
+                with open(original_file) as old_file:
+                    for line in old_file:
+                        new_file.write(line)
+                        if line_to_copy_below in line:
+                            line_found = True
+                            for line_to_copy in lines_to_copy:
+                                new_file.write(line_to_copy)
+            if not line_found:
+                raise ValueError(f"Line {line_to_copy_below} was not found in file.")
+            # Copy the file permissions from the old file to the new file
+            copymode(original_file, abs_path)
+            # Remove original file
+            remove(original_file)
+            # Move new file
+            move(abs_path, original_file)
+        def skip_units(line):
+            return ("generating PyTorch" in line and not output_pytorch) or (
+                "generating TensorFlow" in line and not output_tensorflow
+            )
+        def replace_in_files(path_to_datafile):
+            with open(path_to_datafile) as datafile:
+                lines_to_copy = []
+                skip_file = False
+                skip_snippet = False
+                for line in datafile:
+                    if "# To replace in: " in line and "##" not in line:
+                        file_to_replace_in = line.split('"')[1]
+                        skip_file = skip_units(line)
+                    elif "# Below: " in line and "##" not in line:
+                        line_to_copy_below = line.split('"')[1]
+                        skip_snippet = skip_units(line)
+                    elif "# End." in line and "##" not in line:
+                        if not skip_file and not skip_snippet:
+                            replace(file_to_replace_in, line_to_copy_below, lines_to_copy)
+                        lines_to_copy = []
+                    elif "# Replace with" in line and "##" not in line:
+                        lines_to_copy = []
+                    elif "##" not in line:
+                        lines_to_copy.append(line)
+            remove(path_to_datafile)
+        replace_in_files(f"{directory}/to_replace_{lowercase_model_name}.py")
+        os.rmdir(directory)
--- a/src/transformers/commands/transformers_cli.py
+++ b/src/transformers/commands/transformers_cli.py
 #!/usr/bin/env python
 from argparse import ArgumentParser
+from transformers.commands.add_new_model import AddNewModelCommand
 from transformers.commands.convert import ConvertCommand
 from transformers.commands.download import DownloadCommand
 from transformers.commands.env import EnvironmentCommand
@@ -20,6 +21,7 @@ def main():
    RunCommand.register_subcommand(commands_parser)
    ServeCommand.register_subcommand(commands_parser)
    UserCommands.register_subcommand(commands_parser)
+    AddNewModelCommand.register_subcommand(commands_parser)
    # Let's go
    args = parser.parse_args()

--- a/src/transformers/configuration_auto.py
+++ b/src/transformers/configuration_auto.py
@@ -59,6 +59,7 @@ from .configuration_xlnet import XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLNetConfi
 ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
    (key, value)
    for pretrained_map in [
+        # Add archive maps here
        BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
        BART_PRETRAINED_CONFIG_ARCHIVE_MAP,
        BLENDERBOT_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -95,6 +96,7 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
 CONFIG_MAPPING = OrderedDict(
    [
+        # Add configs here
        ("retribert", RetriBertConfig),
        ("t5", T5Config),
        ("mobilebert", MobileBertConfig),
@@ -136,6 +138,7 @@ CONFIG_MAPPING = OrderedDict(
 MODEL_NAMES_MAPPING = OrderedDict(
    [
+        # Add full (and cased) model names here
        ("retribert", "RetriBERT"),
        ("t5", "T5"),
        ("mobilebert", "MobileBERT"),

--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@@ -226,11 +226,14 @@ from .modeling_xlnet import (
 from .utils import logging
+# Add modeling imports here
 logger = logging.get_logger(__name__)
 MODEL_MAPPING = OrderedDict(
    [
+        # Base model mapping
        (RetriBertConfig, RetriBertModel),
        (T5Config, T5Model),
        (DistilBertConfig, DistilBertModel),
@@ -266,6 +269,7 @@ MODEL_MAPPING = OrderedDict(
 MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
    [
+        # Model for pre-training mapping
        (LayoutLMConfig, LayoutLMForMaskedLM),
        (RetriBertConfig, RetriBertModel),
        (T5Config, T5ForConditionalGeneration),
@@ -295,6 +299,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
 MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
    [
+        # Model with LM heads mapping
        (LayoutLMConfig, LayoutLMForMaskedLM),
        (T5Config, T5ForConditionalGeneration),
        (DistilBertConfig, DistilBertForMaskedLM),
@@ -325,6 +330,7 @@ MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
 MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
    [
+        # Model for Causal LM mapping
        (CamembertConfig, CamembertForCausalLM),
        (XLMRobertaConfig, XLMRobertaForCausalLM),
        (RobertaConfig, RobertaForCausalLM),
@@ -347,6 +353,7 @@ MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
 MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
    [
+        # Model for Masked LM mapping
        (LayoutLMConfig, LayoutLMForMaskedLM),
        (DistilBertConfig, DistilBertForMaskedLM),
        (AlbertConfig, AlbertForMaskedLM),
@@ -368,6 +375,7 @@ MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
 MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
    [
+        # Model for Seq2Seq Causal LM mapping
        (T5Config, T5ForConditionalGeneration),
        (PegasusConfig, PegasusForConditionalGeneration),
        (MarianConfig, MarianMTModel),
@@ -383,6 +391,7 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
 MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
    [
+        # Model for Sequence Classification mapping
        (DistilBertConfig, DistilBertForSequenceClassification),
        (AlbertConfig, AlbertForSequenceClassification),
        (CamembertConfig, CamembertForSequenceClassification),
@@ -407,6 +416,7 @@ MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
 MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
    [
+        # Model for Question Answering mapping
        (DistilBertConfig, DistilBertForQuestionAnswering),
        (AlbertConfig, AlbertForQuestionAnswering),
        (CamembertConfig, CamembertForQuestionAnswering),
@@ -429,6 +439,7 @@ MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
 MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
    [
+        # Model for Token Classification mapping
        (LayoutLMConfig, LayoutLMForTokenClassification),
        (DistilBertConfig, DistilBertForTokenClassification),
        (CamembertConfig, CamembertForTokenClassification),
@@ -450,6 +461,7 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
 MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
    [
+        # Model for Multiple Choice mapping
        (CamembertConfig, CamembertForMultipleChoice),
        (ElectraConfig, ElectraForMultipleChoice),
        (XLMRobertaConfig, XLMRobertaForMultipleChoice),

--- a/src/transformers/modeling_tf_auto.py
+++ b/src/transformers/modeling_tf_auto.py
@@ -169,11 +169,14 @@ from .modeling_tf_xlnet import (
 from .utils import logging
+# Add modeling imports here
 logger = logging.get_logger(__name__)
 TF_MODEL_MAPPING = OrderedDict(
    [
+        # Base model mapping
        (LxmertConfig, TFLxmertModel),
        (T5Config, TFT5Model),
        (DistilBertConfig, TFDistilBertModel),
@@ -200,6 +203,7 @@ TF_MODEL_MAPPING = OrderedDict(
 TF_MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
    [
+        # Model for pre-training mapping
        (LxmertConfig, TFLxmertForPreTraining),
        (T5Config, TFT5ForConditionalGeneration),
        (DistilBertConfig, TFDistilBertForMaskedLM),
@@ -224,6 +228,7 @@ TF_MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
 TF_MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
    [
+        # Model with LM heads mapping
        (T5Config, TFT5ForConditionalGeneration),
        (DistilBertConfig, TFDistilBertForMaskedLM),
        (AlbertConfig, TFAlbertForMaskedLM),
@@ -249,6 +254,7 @@ TF_MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
 TF_MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
    [
+        # Model for Causal LM mapping
        (BertConfig, TFBertLMHeadModel),
        (OpenAIGPTConfig, TFOpenAIGPTLMHeadModel),
        (GPT2Config, TFGPT2LMHeadModel),
@@ -264,6 +270,7 @@ TF_MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
 TF_MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
    [
+        # Model for Masked LM mapping
        (DistilBertConfig, TFDistilBertForMaskedLM),
        (AlbertConfig, TFAlbertForMaskedLM),
        (CamembertConfig, TFCamembertForMaskedLM),
@@ -282,6 +289,7 @@ TF_MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
 TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
    [
+        # Model for Seq2Seq Causal LM mapping
        (T5Config, TFT5ForConditionalGeneration),
        (MarianConfig, TFMarianMTModel),
        (MBartConfig, TFMBartForConditionalGeneration),
@@ -293,6 +301,7 @@ TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
 TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
    [
+        # Model for Sequence Classification mapping
        (DistilBertConfig, TFDistilBertForSequenceClassification),
        (AlbertConfig, TFAlbertForSequenceClassification),
        (CamembertConfig, TFCamembertForSequenceClassification),
@@ -310,6 +319,7 @@ TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
 TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
    [
+        # Model for Question Answering mapping
        (DistilBertConfig, TFDistilBertForQuestionAnswering),
        (AlbertConfig, TFAlbertForQuestionAnswering),
        (CamembertConfig, TFCamembertForQuestionAnswering),
@@ -328,6 +338,7 @@ TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
 TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
    [
+        # Model for Token Classification mapping
        (DistilBertConfig, TFDistilBertForTokenClassification),
        (AlbertConfig, TFAlbertForTokenClassification),
        (CamembertConfig, TFCamembertForTokenClassification),
@@ -345,6 +356,7 @@ TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
 TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
    [
+        # Model for Multiple Choice mapping
        (CamembertConfig, TFCamembertForMultipleChoice),
        (XLMConfig, TFXLMForMultipleChoice),
        (XLMRobertaConfig, TFXLMRobertaForMultipleChoice),

--- a/templates/adding_a_new_model/README.md
+++ b/templates/adding_a_new_model/README.md
-# How to add a new model in 🤗 Transformers
+# Using `cookiecutter` to generate models
-This folder describes the process to add a new model in 🤗 Transformers and provide templates for the required files.
+This folder contains templates to generate new models that fit the current API and pass all tests. It generates
+models in both PyTorch and TensorFlow, completes the `__init__.py` and auto-modeling files, and creates the
-The library is designed to incorporate a variety of models and code bases. As such the process for adding a new model
+documentation.
-usually mostly consists in copy-pasting to relevant original code in the various sections of the templates included in
-the present repository.
+## Usage
-One important point though is that the library has the following goals impacting the way models are incorporated:
+Using the `cookiecutter` utility requires to have all the `dev` dependencies installed. Let's first clone the 
+repository and install it in our environment:
- One specific feature of the API is the capability to run the model and tokenizer inline. The tokenization code thus
-  often have to be slightly adapted to allow for running in the python interpreter.
+```shell script
- the package is also designed to be as self-consistent and with a small and reliable set of packages dependencies. In
+git clone https://github.com/huggingface/transformers
-  consequence, additional dependencies are usually not allowed when adding a model but can be allowed for the
+cd transformers
-  inclusion of a new tokenizer (recent examples of dependencies added for tokenizer specificities include
+pip install -e ".[dev]"
-  `sentencepiece` and `sacremoses`). Please make sure to check the existing dependencies when possible before adding a
+```
-  new one.
+Once the installation is done, you can use the CLI command `add-new-model` to generate your models:
-For a quick overview of the general philosphy of the library and its organization, please check the
-[QuickStart section of the documentation](https://huggingface.co/transformers/philosophy.html).
+```shell script
+transformers-cli add-new-model
-# Typical workflow for including a model
+```
-Here an overview of the general workflow:
+This should launch the `cookiecutter` package which should prompt you to fill in the configuration.
- [ ] Add model/configuration/tokenization classes.
+The `modelname` should be cased according to the plain text casing, i.e., BERT, RoBERTa, DeBERTa.
- [ ] Add conversion scripts.
+```
- [ ] Add tests and a @slow integration test.
+modelname [<ModelNAME>]:
- [ ] Document your model.
+uppercase_modelname [<MODEL_NAME>]: 
- [ ] Finalize.
+lowercase_modelname [<model_name>]: 
+camelcase_modelname [<ModelName>]: 
-Let's detail what should be done at each step.
+```
-## Adding model/configuration/tokenization classes
+Fill in the `authors` with your team members:
+```
-Here is the workflow for adding model/configuration/tokenization classes:
+authors [The HuggingFace Team]: 
+```
- [ ] Copy the python files from the present folder to the main folder and rename them, replacing `xxx` with your model
-  name.
+The checkpoint identifier is the checkpoint that will be used in the examples across the files. Put the name you wish,
- [ ] Edit the files to replace `XXX` (with various casing) with your model name.
+as it will appear on the modelhub. Do not forget to include the organisation.
- [ ] Copy-paste or create a simple configuration class for your model in the `configuration_...` file.
+```
- [ ] Copy-paste or create the code for your model in the `modeling_...` files (PyTorch and TF 2.0).
+checkpoint_identifier [organisation/<model_name>-base-cased]: 
- [ ] Copy-paste or create a tokenizer class for your model in the `tokenization_...` file.
+```
-## Adding conversion scripts
+The tokenizer should either be based on BERT if it behaves exactly like the BERT tokenizer, or a standalone otherwise.
+```
-Here is the workflow for the conversion scripts:
+Select tokenizer_type:
+1 - Based on BERT
- [ ] Copy the conversion script (`convert_...`) from the present folder to the main folder.
+2 - Standalone
- [ ] Edit this script to convert your original checkpoint weights to the current pytorch ones.
+Choose from 1, 2 [1]: 
+```
-## Adding tests:
+<!---
+Choose if your model is an encoder-decoder, or an encoder-only architecture.
-Here is the workflow for the adding tests:
+If your model is an encoder-only architecture, the generated architecture will be based on the BERT model. 
- [ ] Copy the python files from the `tests` sub-folder of the present folder to the `tests` subfolder of the main
+If your model is an encoder-decoder architecture, the generated architecture will be based on the BART model. You can,
-  folder and rename them, replacing `xxx` with your model name.
+of course, edit the files once the generation is complete.
- [ ] Edit the tests files to replace `XXX` (with various casing) with your model name.
+```
- [ ] Edit the tests code as needed.
+Select is_encoder_decoder_model:
+1 - True
-## Documenting your model:
+2 - False
+Choose from 1, 2 [1]: 
-Here is the workflow for documentation:
+```
+-->
- [ ] Make sure all your arguments are properly documented in your configuration and tokenizer.
- [ ] Most of the documentation of the models is automatically generated, you just have to make sure that
+Once the command has finished, you should have a total of 7 new files spread across the repository:
-  `XXX_START_DOCSTRING` contains an introduction to the model you're adding and a link to the original
+```
-  article and that `XXX_INPUTS_DOCSTRING` contains all the inputs of your model.
+docs/source/model_doc/<model_name>.rst
- [ ] Create a new page `xxx.rst` in the folder `docs/source/model_doc` and add this file in `docs/source/index.rst`.
+src/transformers/configuration_<model_name>.py
+src/transformers/modeling_<model_name>.py
-Make sure to check you have no sphinx warnings when building the documentation locally and follow our
+src/transformers/modeling_tf_<model_name>.py
-[documentation guide](https://github.com/huggingface/transformers/tree/master/docs#writing-documentation---specification).
+src/transformers/tokenization_<model_name>.py
+tests/test_modeling_<model_name>.py
-## Final steps
+tests/test_modeling_tf_<model_name>.py
+```
-You can then finish the addition step by adding imports for your classes in the common files:
+You can run the tests to ensure that they all pass:
- [ ] Add import for all the relevant classes in `__init__.py`.
- [ ] Add your configuration in `configuration_auto.py`.
+```
- [ ] Add your PyTorch and TF 2.0 model respectively in `modeling_auto.py` and `modeling_tf_auto.py`.
+python -m pytest ./tests/test_*<model_name>*.py
- [ ] Add your tokenizer in `tokenization_auto.py`.
+```
- [ ] Add a link to your conversion script in the main conversion utility (in `commands/convert.py`)
- [ ] Edit the PyTorch to TF 2.0 conversion script to add your model in the `convert_pytorch_checkpoint_to_tf2.py`
+Feel free to modify each file to mimic the behavior of your model. 
-  file.
- [ ] Add a mention of your model in the doc: `README.md` and the documentation itself
+⚠ You should be careful about the classes preceded by the following line:️ 
-  in `docs/source/pretrained_models.rst`. Rune `make fix-copies` to update `docs/source/index.rst` with your changes.
- [ ] Upload the pretrained weights, configurations and vocabulary files.
+```python
- [ ] Create model card(s) for your models on huggingface.co. For those last two steps, check the
+# Copied from transformers.[...]
-  [model sharing documentation](https://huggingface.co/transformers/model_sharing.html).
+```
+This line ensures that the copy does not diverge from the source. If it *should* diverge, because the implementation
+is different, this line needs to be deleted. If you don't delete this line and run `make fix-copies`,
+your changes will be overwritten.
+Once you have edited the files to fit your architecture, simply re-run the tests (and edit them if a change 
+is needed!) afterwards to make sure everything works as expected. 
+Once the files are generated and you are happy with your changes, here's a checklist to ensure that your contribution
+will be merged quickly:
+- You should run the `make fixup` utility to fix the style of the files and to ensure the code quality meets the
+  library's standards.
+- You should complete the documentation file (`docs/source/model_doc/<model_name>.rst`) so that your model may be
+  usable.
\ No newline at end of file
--- a/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py
+++ b/templates/adding_a_new_model/convert_xxx_original_tf_checkpoint_to_pytorch.py
-# coding=utf-8
-# Copyright 2018 The HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Convert XXX checkpoint."""
-import argparse
-import logging
-import torch
-from transformers import XxxConfig, XxxForPreTraining, load_tf_weights_in_xxx
-logging.basicConfig(level=logging.INFO)
-def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
-    # Initialise PyTorch model
-    config = XxxConfig.from_json_file(config_file)
-    print("Building PyTorch model from configuration: {}".format(str(config)))
-    model = XxxForPreTraining(config)
-    # Load weights from tf checkpoint
-    load_tf_weights_in_xxx(model, config, tf_checkpoint_path)
-    # Save pytorch-model
-    print("Save PyTorch model to {}".format(pytorch_dump_path))
-    torch.save(model.state_dict(), pytorch_dump_path)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    # Required parameters
-    parser.add_argument(
-        "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
-    )
-    parser.add_argument(
-        "--config_file",
-        default=None,
-        type=str,
-        required=True,
-        help="The config json file corresponding to the pre-trained model. \n"
-        "This specifies the model architecture.",
-    )
-    parser.add_argument(
-        "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
-    )
-    args = parser.parse_args()
-    convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration.json
+{
+  "modelname": "{{cookiecutter.modelname}}",
+  "uppercase_modelname": "{{cookiecutter.uppercase_modelname}}",
+  "lowercase_modelname": "{{cookiecutter.lowercase_modelname}}",
+  "camelcase_modelname": "{{cookiecutter.camelcase_modelname}}",
+  "authors": "{{cookiecutter.authors}}",
+  "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
+  "tokenizer_type": "{{cookiecutter.tokenizer_type}}",
+  "generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}"
+}
--- a/templates/adding_a_new_model/configuration_xxx.py
+++ b/templates/adding_a_new_model/configuration_xxx.py
 # coding=utf-8
-# Copyright 2010, XXX authors
+# Copyright {{cookiecutter.authors}} and The HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,97 +12,118 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" XXX model configuration """
+""" {{cookiecutter.modelname}} model configuration """
-import logging
-from typing import Callable, Union
 from .configuration_utils import PretrainedConfig
+from .utils import logging
-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)
-XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+{{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-config.json",
+    "{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/config.json",
-    "xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-config.json",
+    # See all {{cookiecutter.modelname}} models at https://huggingface.co/models?filter={{cookiecutter.lowercase_modelname}}
 }
-class XxxConfig(PretrainedConfig):
+class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
    r"""
-    This is the configuration class to store the configuration of a :class:`~transformers.XxxModel` or a
+    This is the configuration class to store the configuration of a :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model`.
-    :class:`~transformers.TFXxxModel`. It is used to instantiate a XXX model according to the specified
+    It is used to instantiate an {{cookiecutter.modelname}} model according to the specified arguments, defining the model
-    arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar
+    architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
-    configuration to that of the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
+    the {{cookiecutter.modelname}} `{{cookiecutter.checkpoint_identifier}} <https://huggingface.co/{{cookiecutter.checkpoint_identifier}}>`__ architecture.
-    Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
+    Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
-    to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
+    to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
    for more information.
    Args:
        vocab_size (:obj:`int`, `optional`, defaults to 30522):
-            Vocabulary size of the XXX model. Defines the number of different tokens that can be represented by the
+            Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
-            :obj:`inputs_ids` passed when calling :class:`~transformers.XxxModel` or
+            :obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
-            :class:`~transformers.TFXxxModel`.
+            :class:`~transformers.TF{{cookiecutter.camelcase_modelname}}Model`.
+            Vocabulary size of the  model. Defines the different tokens that
+            can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model`.
        hidden_size (:obj:`int`, `optional`, defaults to 768):
            Dimensionality of the encoder layers and the pooler layer.
        num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (:obj:`int`, `optional`, defaults to 12):
            Number of attention heads for each attention layer in the Transformer encoder.
-        hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"gelu"`):
+        intermediate_size (:obj:`int`, `optional`, defaults to 3072):
+            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler.
+            If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported.
-            If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
            The maximum sequence length that this model might ever be used with.
            Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
        type_vocab_size (:obj:`int`, `optional`, defaults to 2):
-            The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.XxxModel` or
+            The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
-            :class:`~transformers.TFXxxModel`.
+            :class:`~transformers.TF{{cookiecutter.camelcase_modelname}}Model`.
        initializer_range (:obj:`float`, `optional`, defaults to 0.02):
-            The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-5):
+        layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
            The epsilon used by the layer normalization layers.
-        gradient_checkpointing (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
+    Example::
-        kwargs:
-            Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
+        >>> from transformers import {{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}Config
+        >>> # Initializing a {{cookiecutter.modelname}} {{cookiecutter.checkpoint_identifier}} style configuration
+        >>> configuration = {{cookiecutter.camelcase_modelname}}Config()
+        >>> # Initializing a model from the {{cookiecutter.checkpoint_identifier}} style configuration
+        >>> model = {{cookiecutter.camelcase_modelname}}Model(configuration)
+        >>> # Accessing the model configuration
+        >>> configuration = model.config
    """
-    model_type = "xxx"
+    model_type = "{{cookiecutter.lowercase_modelname}}"
    def __init__(
        self,
-        vocab_size: int = 50257,
+        vocab_size=30522,
-        hidden_size: int = 1024,
+        hidden_size=768,
-        num_hidden_layers: int = 12,
+        is_encoder_decoder=False,
-        num_attention_heads: int = 12,
+        num_hidden_layers=12,
-        hidden_act: Union[str, Callable] = "gelu",
+        num_attention_heads=12,
-        hidden_dropout_prob: float = 0.1,
+        intermediate_size=3072,
-        attention_probs_dropout_prob: float = 0.1,
+        hidden_act="gelu",
-        max_position_embeddings: int = 512,
+        hidden_dropout_prob=0.1,
-        type_vocab_size: int = 2,
+        attention_probs_dropout_prob=0.1,
-        initializer_range: float = 0.02,
+        max_position_embeddings=512,
-        layer_norm_epsilon: float = 1e-5,
+        type_vocab_size=2,
-        gradient_checkpointing: bool = False,
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
+        pad_token_id=1,
+        bos_token_id=0,
+        eos_token_id=2,
        **kwargs
    ):
-        super().__init__(**kwargs)
+        super().__init__(
+            pad_token_id=pad_token_id,
+            is_encoder_decoder=is_encoder_decoder,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            **kwargs
+        )
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
+        self.intermediate_size = intermediate_size
        self.hidden_act = hidden_act
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.max_position_embeddings = max_position_embeddings
        self.type_vocab_size = type_vocab_size
        self.initializer_range = initializer_range
-        self.layer_norm_epsilon = layer_norm_epsilon
+        self.layer_norm_eps = layer_norm_eps
-        self.gradient_checkpointing = gradient_checkpointing
--- a/templates/adding_a_new_model/modeling_tf_xxx.py
+++ b/templates/adding_a_new_model/modeling_tf_xxx.py
--- a/templates/adding_a_new_model/modeling_xxx.py
+++ b/templates/adding_a_new_model/modeling_xxx.py
--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from transformers import {{cookiecutter.camelcase_modelname}}Config, is_tf_available
+from transformers.testing_utils import require_tf, slow
+from .test_configuration_common import ConfigTester
+from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+if is_tf_available():
+    import tensorflow as tf
+    from transformers.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
+        TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
+        TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+        TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+        TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+        TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
+        TF{{cookiecutter.camelcase_modelname}}Model,
+    )
+class TF{{cookiecutter.camelcase_modelname}}ModelTester:
+    def __init__(
+        self,
+        parent,
+        batch_size=13,
+        seq_length=7,
+        is_training=True,
+        use_input_mask=True,
+        use_token_type_ids=True,
+        use_labels=True,
+        vocab_size=99,
+        hidden_size=32,
+        num_hidden_layers=5,
+        num_attention_heads=4,
+        intermediate_size=37,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=16,
+        type_sequence_label_size=2,
+        initializer_range=0.02,
+        num_labels=3,
+        num_choices=4,
+        scope=None,
+    ):
+        self.parent = parent
+        self.batch_size = 13
+        self.seq_length = 7
+        self.is_training = True
+        self.use_input_mask = True
+        self.use_token_type_ids = True
+        self.use_labels = True
+        self.vocab_size = 99
+        self.hidden_size = 32
+        self.num_hidden_layers = 5
+        self.num_attention_heads = 4
+        self.intermediate_size = 37
+        self.hidden_act = "gelu"
+        self.hidden_dropout_prob = 0.1
+        self.attention_probs_dropout_prob = 0.1
+        self.max_position_embeddings = 512
+        self.type_vocab_size = 16
+        self.type_sequence_label_size = 2
+        self.initializer_range = 0.02
+        self.num_labels = 3
+        self.num_choices = 4
+        self.scope = None
+    def prepare_config_and_inputs(self):
+        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
+        input_mask = None
+        if self.use_input_mask:
+            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+        token_type_ids = None
+        if self.use_token_type_ids:
+            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
+        sequence_labels = None
+        token_labels = None
+        choice_labels = None
+        if self.use_labels:
+            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
+            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
+            choice_labels = ids_tensor([self.batch_size], self.num_choices)
+        config = {{cookiecutter.camelcase_modelname}}Config(
+            vocab_size=self.vocab_size,
+            hidden_size=self.hidden_size,
+            num_hidden_layers=self.num_hidden_layers,
+            num_attention_heads=self.num_attention_heads,
+            intermediate_size=self.intermediate_size,
+            hidden_act=self.hidden_act,
+            hidden_dropout_prob=self.hidden_dropout_prob,
+            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
+            max_position_embeddings=self.max_position_embeddings,
+            type_vocab_size=self.type_vocab_size,
+            initializer_range=self.initializer_range,
+            return_dict=True,
+        )
+        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    def create_and_check_model(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        model = TF{{cookiecutter.camelcase_modelname}}Model(config=config)
+        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
+        inputs = [input_ids, input_mask]
+        result = model(inputs)
+        result = model(input_ids)
+        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
+    def create_and_check_for_masked_lm(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        model = TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(config=config)
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+            "token_type_ids": token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
+    def create_and_check_for_sequence_classification(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        config.num_labels = self.num_labels
+        model = TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(config=config)
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+            "token_type_ids": token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
+    def create_and_check_for_multiple_choice(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        config.num_choices = self.num_choices
+        model = TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(config=config)
+        multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
+        multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
+        multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
+        inputs = {
+            "input_ids": multiple_choice_inputs_ids,
+            "attention_mask": multiple_choice_input_mask,
+            "token_type_ids": multiple_choice_token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
+    def create_and_check_for_token_classification(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        config.num_labels = self.num_labels
+        model = TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(config=config)
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+            "token_type_ids": token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
+    def create_and_check_for_question_answering(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        model = TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(config=config)
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+            "token_type_ids": token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
+        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
+    def prepare_config_and_inputs_for_common(self):
+        config_and_inputs = self.prepare_config_and_inputs()
+        (
+            config,
+            input_ids,
+            token_type_ids,
+            input_mask,
+            sequence_labels,
+            token_labels,
+            choice_labels,
+        ) = config_and_inputs
+        inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
+        return config, inputs_dict
+@require_tf
+class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
+    all_model_classes = (
+        (
+            TF{{cookiecutter.camelcase_modelname}}Model,
+            TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
+            TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+            TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+            TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
+            TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+        )
+        if is_tf_available()
+        else ()
+    )
+    def setUp(self):
+        self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
+        self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config, hidden_size=37)
+    def test_config(self):
+        self.config_tester.run_common_tests()
+    def test_model(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_model(*config_and_inputs)
+    def test_for_masked_lm(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
+    def test_for_multiple_choice(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
+    def test_for_question_answering(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
+    def test_for_sequence_classification(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
+    def test_for_token_classification(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
+    @slow
+    def test_model_from_pretrained(self):
+        model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained("{{cookiecutter.checkpoint_identifier}}")
+        self.assertIsNotNone(model)
--- a/templates/adding_a_new_model/tests/test_modeling_xxx.py
+++ b/templates/adding_a_new_model/tests/test_modeling_xxx.py
 # coding=utf-8
-# Copyright 2018 XXX Authors.
+# Copyright 2018 The Google AI Language Team Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,61 +12,56 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+""" Testing suite for the PyTorch {{cookiecutter.modelname}} model. """
 import unittest
 from transformers import is_torch_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, slow, torch_device
 from .test_configuration_common import ConfigTester
-from .test_modeling_common import ModelTesterMixin, ids_tensor
+from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
 if is_torch_available():
    from transformers import (
-        AutoModelForMaskedLM,
+        {{cookiecutter.camelcase_modelname}}Config,
-        AutoTokenizer,
+        {{cookiecutter.camelcase_modelname}}ForMaskedLM,
-        XxxConfig,
+        {{cookiecutter.camelcase_modelname}}ForMultipleChoice,
-        XxxForMaskedLM,
+        {{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
-        XxxForMultipleChoice,
+        {{cookiecutter.camelcase_modelname}}ForSequenceClassification,
-        XxxForQuestionAnswering,
+        {{cookiecutter.camelcase_modelname}}ForTokenClassification,
-        XxxForSequenceClassification,
+        {{cookiecutter.camelcase_modelname}}Model,
-        XxxForTokenClassification,
-        XxxModel,
    )
-    from transformers.file_utils import cached_property
+    from transformers.modeling_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST
-    #
-class XxxModelTester:
-    """You can also import this e.g from .test_modeling_bart import BartModelTester """
+class {{cookiecutter.camelcase_modelname}}ModelTester:
    def __init__(
-        self,
+            self,
-        parent,
+            parent,
-        batch_size=13,
+            batch_size=13,
-        seq_length=7,
+            seq_length=7,
-        is_training=True,
+            is_training=True,
-        use_input_mask=True,
+            use_input_mask=True,
-        use_token_type_ids=True,
+            use_token_type_ids=True,
-        use_labels=True,
+            use_labels=True,
-        vocab_size=99,
+            vocab_size=99,
-        hidden_size=32,
+            hidden_size=32,
-        num_hidden_layers=5,
+            num_hidden_layers=5,
-        num_attention_heads=4,
+            num_attention_heads=4,
-        intermediate_size=37,
+            intermediate_size=37,
-        hidden_act="gelu",
+            hidden_act="gelu",
-        hidden_dropout_prob=0.1,
+            hidden_dropout_prob=0.1,
-        attention_probs_dropout_prob=0.1,
+            attention_probs_dropout_prob=0.1,
-        max_position_embeddings=512,
+            max_position_embeddings=512,
-        type_vocab_size=16,
+            type_vocab_size=16,
-        type_sequence_label_size=2,
+            type_sequence_label_size=2,
-        initializer_range=0.02,
+            initializer_range=0.02,
-        num_labels=3,
+            num_labels=3,
-        num_choices=4,
+            num_choices=4,
-        scope=None,
+            scope=None,
    ):
        self.parent = parent
        self.batch_size = batch_size
@@ -96,7 +91,7 @@ class XxxModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_type_ids = None
        if self.use_token_type_ids:
@@ -110,7 +105,7 @@ class XxxModelTester:
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)
-        config = XxxConfig(
+        config = {{cookiecutter.camelcase_modelname}}Config(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
@@ -121,6 +116,7 @@ class XxxModelTester:
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
+            is_decoder=False,
            initializer_range=self.initializer_range,
            return_dict=True,
        )
@@ -128,30 +124,29 @@ class XxxModelTester:
        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    def create_and_check_model(
-        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
-        model = XxxModel(config=config)
+        model = {{cookiecutter.camelcase_modelname}}Model(config=config)
        model.to(torch_device)
        model.eval()
        result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
        result = model(input_ids, token_type_ids=token_type_ids)
        result = model(input_ids)
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
-        self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
    def create_and_check_for_masked_lm(
-        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
-        model = XxxForMaskedLM(config=config)
+        model = {{cookiecutter.camelcase_modelname}}ForMaskedLM(config=config)
        model.to(torch_device)
        model.eval()
        result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
    def create_and_check_for_question_answering(
-        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
-        model = XxxForQuestionAnswering(config=config)
+        model = {{cookiecutter.camelcase_modelname}}ForQuestionAnswering(config=config)
        model.to(torch_device)
        model.eval()
        result = model(
@@ -165,30 +160,30 @@ class XxxModelTester:
        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
    def create_and_check_for_sequence_classification(
-        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_labels = self.num_labels
-        model = XxxForSequenceClassification(config)
+        model = {{cookiecutter.camelcase_modelname}}ForSequenceClassification(config)
        model.to(torch_device)
        model.eval()
        result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
    def create_and_check_for_token_classification(
-        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_labels = self.num_labels
-        model = XxxForTokenClassification(config=config)
+        model = {{cookiecutter.camelcase_modelname}}ForTokenClassification(config=config)
        model.to(torch_device)
        model.eval()
        result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
    def create_and_check_for_multiple_choice(
-        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.num_choices = self.num_choices
-        model = XxxForMultipleChoice(config=config)
+        model = {{cookiecutter.camelcase_modelname}}ForMultipleChoice(config=config)
        model.to(torch_device)
        model.eval()
        multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
@@ -218,17 +213,24 @@ class XxxModelTester:
 @require_torch
-class XxxModelTest(ModelTesterMixin, unittest.TestCase):
+class {{cookiecutter.camelcase_modelname}}ModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (
-        (XxxModel, XxxForMaskedLM, XxxForQuestionAnswering, XxxForSequenceClassification, XxxForTokenClassification)
+        (
+            {{cookiecutter.camelcase_modelname}}Model,
+            {{cookiecutter.camelcase_modelname}}ForMaskedLM,
+            {{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+            {{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+            {{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+            {{cookiecutter.camelcase_modelname}}ForTokenClassification,
+        )
        if is_torch_available()
        else ()
    )
    def setUp(self):
-        self.model_tester = XxxModelTester(self)
+        self.model_tester = {{cookiecutter.camelcase_modelname}}ModelTester(self)
-        self.config_tester = ConfigTester(self, config_class=XxxConfig, hidden_size=37)
+        self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config, hidden_size=37)
    def test_config(self):
        self.config_tester.run_common_tests()
@@ -241,6 +243,10 @@ class XxxModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
+    def test_for_multiple_choice(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
@@ -253,55 +259,10 @@ class XxxModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
-    def test_for_multiple_choice(self):
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_electra_for_multiple_choice(*config_and_inputs)
    @slow
-    def test_lm_outputs_same_as_reference_model(self):
+    def test_model_from_pretrained(self):
-        """Write something that could help someone fixing this here."""
+        for model_name in {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        checkpoint_path = "XXX/bart-large"
+            model = {{cookiecutter.camelcase_modelname}}Model.from_pretrained(model_name)
-        model = self.big_model
+            self.assertIsNotNone(model)
-        tokenizer = AutoTokenizer.from_pretrained(
-            checkpoint_path
-        )  # same with AutoTokenizer (see tokenization_auto.py). This is not mandatory
-        # MODIFY THIS DEPENDING ON YOUR MODELS RELEVANT TASK.
-        batch = tokenizer(["I went to the <mask> yesterday"]).to(torch_device)
-        desired_mask_result = tokenizer.decode("store")  # update this
-        logits = model(**batch).logits
-        masked_index = (batch.input_ids == self.tokenizer.mask_token_id).nonzero()
-        assert model.num_parameters() == 175e9  # a joke
-        mask_entry_logits = logits[0, masked_index.item(), :]
-        probs = mask_entry_logits.softmax(dim=0)
-        _, predictions = probs.topk(1)
-        self.assertEqual(tokenizer.decode(predictions), desired_mask_result)
-    @cached_property
-    def big_model(self):
-        """Cached property means this code will only be executed once."""
-        checkpoint_path = "XXX/bart-large"
-        model = AutoModelForMaskedLM.from_pretrained(checkpoint_path).to(
-            torch_device
-        )  # test whether AutoModel can determine your model_class from checkpoint name
-        if torch_device == "cuda":
-            model.half()
-    # optional: do more testing! This will save you time later!
-    @slow
-    def test_that_XXX_can_be_used_in_a_pipeline(self):
-        """We can use self.big_model here without calling __init__ again."""
-        pass
-    def test_XXX_loss_doesnt_change_if_you_add_padding(self):
-        pass
-    def test_XXX_bad_args(self):
-        pass
-    def test_XXX_backward_pass_reduces_loss(self):
-        """Test loss/gradients same as reference implementation, for example."""
-        pass
-    @require_torch_gpu
-    def test_large_inputs_in_fp16_dont_cause_overflow(self):
-        pass
--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/to_replace_{{cookiecutter.lowercase_modelname}}.py
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/to_replace_{{cookiecutter.lowercase_modelname}}.py
+## This file is made so that specific statements may be copied inside existing files. This is useful to copy
+## import statements in __init__.py, or to complete model lists in the AUTO files.
+##
+## It is to be used as such:
+## Put '# To replace in: "FILE_PATH"' in order to indicate the contents will be copied in the file at path FILE_PATH
+## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurence** of that line in the file at FILE_PATH
+## Put '# Replace with:' followed by the lines containing the content to define the content
+## End a statement with '# End.'. If starting a new statement without redefining the FILE_PATH, it will continue pasting
+## content in that file.
+##
+## Put '## COMMENT' to comment on the file.
+# To replace in: "src/transformers/__init__.py"
+# Below: "if is_torch_available():" if generating PyTorch
+# Replace with:
+    from .modeling_{{cookiecutter.lowercase_modelname}} import (
+        {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
+        {{cookiecutter.camelcase_modelname}}ForMaskedLM,
+        {{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+        {{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+        {{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+        {{cookiecutter.camelcase_modelname}}ForTokenClassification,
+        {{cookiecutter.camelcase_modelname}}Layer,
+        {{cookiecutter.camelcase_modelname}}Model,
+        {{cookiecutter.camelcase_modelname}}PreTrainedModel,
+        load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
+    )
+# End.
+# Below: "if is_tf_available():" if generating TensorFlow
+# Replace with:
+    from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
+        TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
+        TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
+        TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+        TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+        TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+        TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
+        TF{{cookiecutter.camelcase_modelname}}Layer,
+        TF{{cookiecutter.camelcase_modelname}}Model,
+        TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
+    )
+# End.
+# Below: "from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig"
+# Replace with:
+from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP, {{cookiecutter.camelcase_modelname}}Config
+# End.
+# To replace in: "src/transformers/configuration_auto.py"
+# Below: "# Add configs here"
+# Replace with:
+        ("{{cookiecutter.lowercase_modelname}}", {{cookiecutter.camelcase_modelname}}Config),
+# End.
+# Below: "# Add archive maps here"
+# Replace with:
+        {{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP,
+# End.
+# Below: "from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig",
+# Replace with:
+from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP, {{cookiecutter.camelcase_modelname}}Config
+# End.
+# Below: "# Add full (and cased) model names here"
+# Replace with:
+        ("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}"),
+# End.
+# To replace in: "src/transformers/modeling_auto.py" if generating PyTorch
+# Below: "from .configuration_auto import ("
+# Replace with:
+    {{cookiecutter.camelcase_modelname}}Config,
+# End.
+# Below: "# Add modeling imports here"
+# Replace with:
+from .modeling_{{cookiecutter.lowercase_modelname}} import (
+    {{cookiecutter.camelcase_modelname}}ForMaskedLM,
+    {{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+    {{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+    {{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+    {{cookiecutter.camelcase_modelname}}ForTokenClassification,
+    {{cookiecutter.camelcase_modelname}}Model,
+)
+# End.
+# Below: "# Base model mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Model),
+# End.
+# Below: "# Model with LM heads mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
+# End.
+# Below: "# Model for Masked LM mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
+# End.
+# Below: "# Model for Sequence Classification mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForSequenceClassification),
+# End.
+# Below: "# Model for Question Answering mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering),
+# End.
+# Below: "# Model for Token Classification mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForTokenClassification),
+# End.
+# Below: "# Model for Multiple Choice mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMultipleChoice),
+# End.
+# To replace in: "src/transformers/modeling_tf_auto.py" if generating TensorFlow
+# Below: "from .configuration_auto import ("
+# Replace with:
+    {{cookiecutter.camelcase_modelname}}Config,
+# End.
+# Below: "# Add modeling imports here"
+# Replace with:
+from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
+    TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
+    TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
+    TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
+    TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
+    TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
+    TF{{cookiecutter.camelcase_modelname}}Model,
+)
+# End.
+# Below: "# Base model mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}Model),
+# End.
+# Below: "# Model with LM heads mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
+# End.
+# Below: "# Model for Masked LM mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
+# End.
+# Below: "# Model for Sequence Classification mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification),
+# End.
+# Below: "# Model for Question Answering mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering),
+# End.
+# Below: "# Model for Token Classification mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification),
+# End.
+# Below: "# Model for Multiple Choice mapping"
+# Replace with:
+        ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice),
+# End.
--- a/templates/adding_a_new_model/tokenization_xxx.py
+++ b/templates/adding_a_new_model/tokenization_xxx.py
--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/{{cookiecutter.lowercase_modelname}}.rst
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/{{cookiecutter.lowercase_modelname}}.rst
+{{cookiecutter.uppercase_modelname}}
+-----------------------------------------------------------------------------------------------------------------------
+Overview
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The {{cookiecutter.modelname}} model was proposed in `<INSERT PAPER NAME HERE>
+<<INSERT PAPER LINK HERE>>`__  by <INSERT AUTHORS HERE>. <INSERT SHORT SUMMARY HERE>
+The abstract from the paper is the following:
+*<INSERT PAPER ABSTRACT HERE>*
+Tips:
+<INSERT TIPS ABOUT MODEL HERE>
+{{cookiecutter.camelcase_modelname}}Config
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Config
+    :members:
+{{cookiecutter.camelcase_modelname}}Tokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Tokenizer
+    :members: build_inputs_with_special_tokens, get_special_tokens_mask,
+        create_token_type_ids_from_sequences, save_vocabulary
+{{cookiecutter.camelcase_modelname}}TokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}TokenizerFast
+    :members: build_inputs_with_special_tokens, get_special_tokens_mask,
+        create_token_type_ids_from_sequences, save_vocabulary
+{% if "PyTorch" in cookiecutter.generate_tensorflow_and_pytorch -%}
+{{cookiecutter.camelcase_modelname}}Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Model
+    :members: forward
+{{cookiecutter.camelcase_modelname}}ForMaskedLM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMaskedLM
+    :members: forward
+{{cookiecutter.camelcase_modelname}}ForSequenceClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForSequenceClassification
+    :members: forward
+{{cookiecutter.camelcase_modelname}}ForMultipleChoice
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
+    :members:
+{{cookiecutter.camelcase_modelname}}ForTokenClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForTokenClassification
+    :members: forward
+{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
+    :members: forward
+{% endif -%}
+{% if "TensorFlow" in cookiecutter.generate_tensorflow_and_pytorch -%}
+TF{{cookiecutter.camelcase_modelname}}Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}Model
+    :members: call
+TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
+    :members: call
+TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
+    :members: call
+TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice
+    :members: call
+TF{{cookiecutter.camelcase_modelname}}ForTokenClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForTokenClassification
+    :members: call
+TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
+    :members: call
+{% endif -%}
--- a/templates/adding_a_new_model/cookiecutter.json
+++ b/templates/adding_a_new_model/cookiecutter.json
+{
+  "modelname": "BrandNewBERT",
+  "uppercase_modelname": "BRAND_NEW_BERT",
+  "lowercase_modelname": "brand_new_bert",
+  "camelcase_modelname": "BrandNewBert",
+  "authors": "The HuggingFace Team",
+  "checkpoint_identifier": "brand-new-bert-base-cased",
+  "tokenizer_type": ["Based on BERT", "Standalone"],
+  "generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"]
+}
\ No newline at end of file
--- a/templates/adding_a_new_model/tests/encoder-bert-tokenizer.json
+++ b/templates/adding_a_new_model/tests/encoder-bert-tokenizer.json
+{
+  "modelname": "EncoderBERT",
+  "uppercase_modelname": "ENCODER_BERT",
+  "lowercase_modelname": "encoder_bert",
+  "camelcase_modelname": "EncoderBert",
+  "authors": "The HuggingFace Team",
+  "checkpoint_identifier": "brand-new-bert-base-cased",
+  "tokenizer_type": "Based on BERT",
+  "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
+}