Enforce all objects in the main init are documented (#9014)

1310e1a7 · Sylvain Gugger · GitHub · 51e81e58 · 1310e1a7 · 1310e1a7
Unverified Commit 1310e1a7 authored Dec 10, 2020 by Sylvain Gugger Committed by GitHub Dec 10, 2020
20 changed files
--- a/docs/source/internal/generation_utils.rst
+++ b/docs/source/internal/generation_utils.rst
@@ -32,6 +32,9 @@ generation.
 .. autoclass:: transformers.LogitsProcessorList
    :members: __call__
+.. autoclass:: transformers.LogitsWarper
+    :members: __call__
 .. autoclass:: transformers.MinLengthLogitsProcessor
    :members: __call__
@@ -67,3 +70,10 @@ BeamSearch
 .. autoclass:: transformers.BeamSearchScorer
    :members: process, finalize
+Utilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: transformers.top_k_top_p_filtering
+.. autofunction:: transformers.tf_top_k_top_p_filtering
--- a/docs/source/internal/trainer_utils.rst
+++ b/docs/source/internal/trainer_utils.rst
@@ -22,6 +22,8 @@ Utilities
 .. autoclass:: transformers.EvalPrediction
+.. autoclass:: transformers.EvaluationStrategy
 .. autofunction:: transformers.set_seed
 .. autofunction:: transformers.torch_distributed_zero_first
@@ -32,8 +34,15 @@ Callbacks internals
 .. autoclass:: transformers.trainer_callback.CallbackHandler
 Distributed Evaluation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autoclass:: transformers.trainer_pt_utils.DistributedTensorGatherer
    :members:
+Distributed Evaluation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.HfArgumentParser
--- a/docs/source/main_classes/optimizer_schedules.rst
+++ b/docs/source/main_classes/optimizer_schedules.rst
@@ -74,6 +74,10 @@ Learning Rate Schedules (Pytorch)
    :target: /imgs/warmup_linear_schedule.png
    :alt:
+.. autofunction:: transformers.get_polynomial_decay_schedule_with_warmup
 Warmup (TensorFlow)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

--- a/docs/source/main_classes/pipelines.rst
+++ b/docs/source/main_classes/pipelines.rst
@@ -73,8 +73,9 @@ FillMaskPipeline
 NerPipeline
 =======================================================================================================================
-This class is an alias of the :class:`~transformers.TokenClassificationPipeline` defined below. Please refer to that
+.. autoclass:: transformers.NerPipeline
-pipeline for documentation and usage examples.
+See :class:`~transformers.TokenClassificationPipeline` for all details.
 QuestionAnsweringPipeline
 =======================================================================================================================
@@ -118,6 +119,13 @@ TokenClassificationPipeline
    :special-members: __call__
    :members:
+TranslationPipeline
+=======================================================================================================================
+.. autoclass:: transformers.TranslationPipeline
+    :special-members: __call__
+    :members:
 ZeroShotClassificationPipeline
 =======================================================================================================================

--- a/docs/source/model_doc/albert.rst
+++ b/docs/source/model_doc/albert.rst
@@ -60,6 +60,13 @@ AlbertTokenizer
        create_token_type_ids_from_sequences, save_vocabulary
+AlbertTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.AlbertTokenizerFast
+    :members:
 Albert specific outputs
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/auto.rst
+++ b/docs/source/model_doc/auto.rst
@@ -175,3 +175,10 @@ TFAutoModelForQuestionAnswering
 .. autoclass:: transformers.TFAutoModelForQuestionAnswering
    :members:
+FlaxAutoModel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.FlaxAutoModel
+    :members:
--- a/docs/source/model_doc/bart.rst
+++ b/docs/source/model_doc/bart.rst
@@ -98,6 +98,12 @@ BartTokenizer
    :members:
+BartTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.BartTokenizerFast
+    :members:
 BartModel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/barthez.rst
+++ b/docs/source/model_doc/barthez.rst
@@ -51,3 +51,9 @@ BarthezTokenizer
 .. autoclass:: transformers.BarthezTokenizer
    :members:
+BarthezTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.BarthezTokenizerFast
+    :members:
--- a/docs/source/model_doc/bert.rst
+++ b/docs/source/model_doc/bert.rst
@@ -207,3 +207,10 @@ FlaxBertModel
 .. autoclass:: transformers.FlaxBertModel
    :members: __call__
+FlaxBertForMaskedLM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.FlaxBertForMaskedLM
+    :members: __call__
--- a/docs/source/model_doc/camembert.rst
+++ b/docs/source/model_doc/camembert.rst
@@ -54,6 +54,13 @@ CamembertTokenizer
        create_token_type_ids_from_sequences, save_vocabulary
+CamembertTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.CamembertTokenizerFast
+    :members:
 CamembertModel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/layoutlm.rst
+++ b/docs/source/model_doc/layoutlm.rst
@@ -57,6 +57,13 @@ LayoutLMTokenizer
    :members:
+LayoutLMTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.LayoutLMTokenizerFast
+    :members:
 LayoutLMModel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/mbart.rst
+++ b/docs/source/model_doc/mbart.rst
@@ -90,6 +90,13 @@ MBartTokenizer
    :members: build_inputs_with_special_tokens, prepare_seq2seq_batch
+MBartTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.MBartTokenizerFast
+    :members:
 MBartForConditionalGeneration
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/mt5.rst
+++ b/docs/source/model_doc/mt5.rst
@@ -37,6 +37,22 @@ MT5Config
    :members:
+MT5Tokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.MT5Tokenizer
+See :class:`~transformers.T5Tokenizer` for all details.
+MT5TokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.MT5TokenizerFast
+See :class:`~transformers.T5TokenizerFast` for all details.
 MT5Model
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/pegasus.rst
+++ b/docs/source/model_doc/pegasus.rst
@@ -112,6 +112,13 @@ warning: ``add_tokens`` does not work at the moment.
    :members: __call__, prepare_seq2seq_batch
+PegasusTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.PegasusTokenizerFast
+    :members:
 PegasusForConditionalGeneration
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/reformer.rst
+++ b/docs/source/model_doc/reformer.rst
@@ -163,6 +163,13 @@ ReformerTokenizer
    :members: save_vocabulary
+ReformerTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.ReformerTokenizerFast
+    :members:
 ReformerModel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/t5.rst
+++ b/docs/source/model_doc/t5.rst
@@ -107,6 +107,13 @@ T5Tokenizer
        create_token_type_ids_from_sequences, prepare_seq2seq_batch, save_vocabulary
+T5TokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.T5TokenizerFast
+    :members:
 T5Model
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/transformerxl.rst
+++ b/docs/source/model_doc/transformerxl.rst
@@ -105,3 +105,11 @@ TFTransfoXLLMHeadModel
 .. autoclass:: transformers.TFTransfoXLLMHeadModel
    :members: call
+Internal Layers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.AdaptiveEmbedding
+.. autoclass:: transformers.TFAdaptiveEmbedding
--- a/docs/source/model_doc/xlmroberta.rst
+++ b/docs/source/model_doc/xlmroberta.rst
@@ -62,6 +62,13 @@ XLMRobertaTokenizer
        create_token_type_ids_from_sequences, save_vocabulary
+XLMRobertaTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.XLMRobertaTokenizerFast
+    :members:
 XLMRobertaModel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/model_doc/xlnet.rst
+++ b/docs/source/model_doc/xlnet.rst
@@ -62,6 +62,13 @@ XLNetTokenizer
        create_token_type_ids_from_sequences, save_vocabulary
+XLNetTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.XLNetTokenizerFast
+    :members:
 XLNet specific outputs
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -17,6 +17,7 @@ import importlib
 import inspect
 import os
 import re
+from pathlib import Path
 # All paths are set with the intent you should run this script from the root of the repo with the command
@@ -57,28 +58,6 @@ TEST_FILES_WITH_NO_COMMON_TESTS = [
    "test_modeling_xlm_roberta.py",
 ]
-# Update this list for models that are not documented with a comment explaining the reason it should not be.
-# Being in this list is an exception and should **not** be the rule.
-IGNORE_NON_DOCUMENTED = [
-    "BartDecoder",  # Building part of bigger (documented) model.
-    "BartEncoder",  # Building part of bigger (documented) model.
-    "DPREncoder",  # Building part of bigger (documented) model.
-    "DPRSpanPredictor",  # Building part of bigger (documented) model.
-    "T5Stack",  # Building part of bigger (documented) model.
-    "TFDPREncoder",  # Building part of bigger (documented) model.
-    "TFDPRSpanPredictor",  # Building part of bigger (documented) model.
-]
-# Update this dict with any special correspondance model name (used in modeling_xxx.py) to doc file.
-MODEL_NAME_TO_DOC_FILE = {
-    "openai": "gpt.rst",
-    "transfo_xl": "transformerxl.rst",
-    "xlm_prophetnet": "xlmprophetnet.rst",
-    "xlm_roberta": "xlmroberta.rst",
-    "bert_generation": "bertgeneration.rst",
-    "marian": "marian.rst",
-}
 # Update this list for models that are not in any of the auto MODEL_XXX_MAPPING. Being in this list is an exception and
 # should **not** be the rule.
 IGNORE_NON_AUTO_CONFIGURED = [
@@ -192,22 +171,6 @@ def get_model_test_files():
    return test_files
-# If some doc source files should be ignored when checking models are all documented, they should be added in the
-# nested list _ignore_modules of this function.
-def get_model_doc_files():
-    """ Get the model doc files."""
-    _ignore_modules = [
-        "auto",
-        "dialogpt",
-        "retribert",
-    ]
-    doc_files = []
-    for filename in os.listdir(PATH_TO_DOC):
-        if os.path.isfile(f"{PATH_TO_DOC}/{filename}") and not os.path.splitext(filename)[0] in _ignore_modules:
-            doc_files.append(filename)
-    return doc_files
 # This is a bit hacky but I didn't find a way to import the test_file as a module and read inside the tester class
 # for the all_model_classes variable.
 def find_tested_models(test_file):
@@ -269,58 +232,6 @@ def check_all_models_are_tested():
        raise Exception(f"There were {len(failures)} failures:\n" + "\n".join(failures))
-def find_documented_classes(doc_file):
-    """ Parse the content of doc_file to detect which classes it documents"""
-    with open(os.path.join(PATH_TO_DOC, doc_file), "r", encoding="utf-8", newline="\n") as f:
-        content = f.read()
-    return re.findall(r"autoclass:: transformers.(\S+)\s+", content)
-def check_models_are_documented(module, doc_file):
-    """ Check models defined in module are documented in doc_file."""
-    defined_models = get_models(module)
-    documented_classes = find_documented_classes(doc_file)
-    failures = []
-    for model_name, _ in defined_models:
-        if model_name not in documented_classes and model_name not in IGNORE_NON_DOCUMENTED:
-            failures.append(
-                f"{model_name} is defined in {module.__name__} but is not documented in "
-                + f"{os.path.join(PATH_TO_DOC, doc_file)}. Add it to that file."
-                + "If this model should not be documented, add its name to `IGNORE_NON_DOCUMENTED`"
-                + "in the file `utils/check_repo.py`."
-            )
-    return failures
-def _get_model_name(module):
-    """ Get the model name for the module defining it."""
-    module_name = module.__name__.split(".")[-1]
-    splits = module_name.split("_")
-    splits = splits[(2 if splits[1] in ["flax", "tf"] else 1) :]
-    return "_".join(splits)
-def check_all_models_are_documented():
-    """ Check all models are properly documented."""
-    modules = get_model_modules()
-    doc_files = get_model_doc_files()
-    failures = []
-    for module in modules:
-        model_name = _get_model_name(module)
-        doc_file = MODEL_NAME_TO_DOC_FILE.get(model_name, f"{model_name}.rst")
-        if doc_file not in doc_files:
-            failures.append(
-                f"{module.__name__} does not have its corresponding doc file {doc_file}. "
-                + f"If the doc file exists but isn't named {doc_file}, update `MODEL_NAME_TO_DOC_FILE` "
-                + "in the file `utils/check_repo.py`."
-            )
-        new_failures = check_models_are_documented(module, doc_file)
-        if new_failures is not None:
-            failures += new_failures
-    if len(failures) > 0:
-        raise Exception(f"There were {len(failures)} failures:\n" + "\n".join(failures))
 def get_all_auto_configured_models():
    """ Return the list of all models in at least one auto class."""
    result = set()  # To avoid duplicates we concatenate all model classes in a set.
@@ -396,13 +307,154 @@ def check_all_decorator_order():
        )
+def find_all_documented_objects():
+    """ Parse the content of all doc files to detect which classes and functions it documents"""
+    documented_obj = []
+    for doc_file in Path(PATH_TO_DOC).glob("**/*.rst"):
+        with open(doc_file) as f:
+            content = f.read()
+        raw_doc_objs = re.findall(r"(?:autoclass|autofunction):: transformers.(\S+)\s+", content)
+        documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs]
+    return documented_obj
+# One good reason for not being documented is to be deprecated. Put in this list deprecated objects.
+DEPRECATED_OBJECTS = [
+    "AutoModelWithLMHead",
+    "GlueDataset",
+    "GlueDataTrainingArguments",
+    "LineByLineTextDataset",
+    "LineByLineWithRefDataset",
+    "LineByLineWithSOPTextDataset",
+    "PretrainedBartModel",
+    "PretrainedFSMTModel",
+    "SingleSentenceClassificationProcessor",
+    "SquadDataTrainingArguments",
+    "SquadDataset",
+    "SquadExample",
+    "SquadFeatures",
+    "SquadV1Processor",
+    "SquadV2Processor",
+    "TFAutoModelWithLMHead",
+    "TextDataset",
+    "TextDatasetForNextSentencePrediction",
+    "glue_compute_metrics",
+    "glue_convert_examples_to_features",
+    "glue_output_modes",
+    "glue_processors",
+    "glue_tasks_num_labels",
+    "squad_convert_examples_to_features",
+    "xnli_compute_metrics",
+    "xnli_output_modes",
+    "xnli_processors",
+    "xnli_tasks_num_labels",
+]
+# Exceptionally, some objects should not be documented after all rules passed.
+# ONLY PUT SOMETHING IN THIS LIST AS A LAST RESORT!
+UNDOCUMENTED_OBJECTS = [
+    "AddedToken",  # This is a tokenizers class.
+    "BasicTokenizer",  # Internal, should never have been in the main init.
+    "DPRPretrainedReader",  # Like an Encoder.
+    "ModelCard",  # Internal type.
+    "SqueezeBertModule",  # Internal building block (should have been called SqueezeBertLayer)
+    "TFDPRPretrainedReader",  # Like an Encoder.
+    "TransfoXLCorpus",  # Internal type.
+    "WordpieceTokenizer",  # Internal, should never have been in the main init.
+    "absl",  # External module
+    "add_end_docstrings",  # Internal, should never have been in the main init.
+    "add_start_docstrings",  # Internal, should never have been in the main init.
+    "cached_path",  # Internal used for downloading models.
+    "convert_tf_weight_name_to_pt_weight_name",  # Internal used to convert model weights
+    "logger",  # Internal logger
+    "logging",  # External module
+]
+# This list should be empty. Objects in it should get their own doc page.
+SHOULD_HAVE_THEIR_OWN_PAGE = [
+    # bert-japanese
+    "BertJapaneseTokenizer",
+    "CharacterTokenizer",
+    "MecabTokenizer",
+    # Bertweet
+    "BertweetTokenizer",
+    # Herbert
+    "HerbertTokenizer",
+    "HerbertTokenizerFast",
+    # Phoebus
+    "PhobertTokenizer",
+    # Benchmarks
+    "PyTorchBenchmark",
+    "PyTorchBenchmarkArguments",
+    "TensorFlowBenchmark",
+    "TensorFlowBenchmarkArguments",
+]
+def ignore_undocumented(name):
+    """Rules to determine if `name` should be undocumented."""
+    # NOT DOCUMENTED ON PURPOSE.
+    # Magic attributes are not documented.
+    if name.startswith("__"):
+        return True
+    # Constants uppercase are not documented.
+    if name.isupper():
+        return True
+    # PreTrainedModels / Encoders / Decoders / Layers / Embeddings / Attention are not documented.
+    if (
+        name.endswith("PreTrainedModel")
+        or name.endswith("Decoder")
+        or name.endswith("Encoder")
+        or name.endswith("Layer")
+        or name.endswith("Embeddings")
+        or name.endswith("Attention")
+    ):
+        return True
+    # Submodules are not documented.
+    if os.path.isdir(os.path.join(PATH_TO_TRANSFORMERS, name)) or os.path.isfile(
+        os.path.join(PATH_TO_TRANSFORMERS, f"{name}.py")
+    ):
+        return True
+    # All load functions are not documented.
+    if name.startswith("load_tf") or name.startswith("load_pytorch"):
+        return True
+    # is_xxx_available functions are not documented.
+    if name.startswith("is_") and name.endswith("_available"):
+        return True
+    # Deprecated objects are not documented.
+    if name in DEPRECATED_OBJECTS or name in UNDOCUMENTED_OBJECTS:
+        return True
+    # MMBT model does not really work.
+    if name.startswith("MMBT"):
+        return True
+    # NOT DOCUMENTED BUT NOT ON PURPOSE, SHOULD BE FIXED!
+    # All data collators should be documented
+    if name.startswith("DataCollator") or name.endswith("data_collator"):
+        return True
+    if name in SHOULD_HAVE_THEIR_OWN_PAGE:
+        return True
+    return False
+def check_all_objects_are_documented():
+    """ Check all models are properly documented."""
+    documented_objs = find_all_documented_objects()
+    undocumented_objs = [c for c in dir(transformers) if c not in documented_objs and not ignore_undocumented(c)]
+    if len(undocumented_objs) > 0:
+        raise Exception(
+            "The following objects are in the public init so should be documented:\n - "
+            + "\n - ".join(undocumented_objs)
+        )
 def check_repo_quality():
    """ Check all models are properly tested and documented."""
    print("Checking all models are properly tested.")
    check_all_decorator_order()
    check_all_models_are_tested()
-    print("Checking all models are properly documented.")
+    print("Checking all objects are properly documented.")
-    check_all_models_are_documented()
+    check_all_objects_are_documented
    print("Checking all models are in at least one auto class.")
    check_all_models_are_auto_configured()