Cleanup more auto mapping names (#21909)

* fix auto 2 * fix auto 2 * fix task guide issue * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Cleanup more auto mapping names (#21909)
* fix auto 2 * fix auto 2 * fix task guide issue * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
02a77fa0 · Yih-Dar · GitHub · b05e0bec · 02a77fa0 · 02a77fa0
Unverified Commit 02a77fa0 authored Mar 03, 2023 by Yih-Dar Committed by GitHub Mar 03, 2023
Showing with 30 additions and 18 deletions

src/transformers/models/auto/modeling_auto.py src/transformers/models/auto/modeling_auto.py +0 -3

utils/check_repo.py utils/check_repo.py +18 -13

utils/check_task_guides.py utils/check_task_guides.py +12 -2

No files found.
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -303,7 +303,6 @@ MODEL_WITH_LM_HEAD_MAPPING_NAMES = OrderedDict(
        ("mpnet", "MPNetForMaskedLM"),
        ("mvp", "MvpForConditionalGeneration"),
        ("nezha", "NezhaForMaskedLM"),
-        ("nllb", "M2M100ForConditionalGeneration"),
        ("nystromformer", "NystromformerForMaskedLM"),
        ("openai-gpt", "OpenAIGPTLMHeadModel"),
        ("pegasus_x", "PegasusXForConditionalGeneration"),
@@ -594,7 +593,6 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
        ("mbart", "MBartForConditionalGeneration"),
        ("mt5", "MT5ForConditionalGeneration"),
        ("mvp", "MvpForConditionalGeneration"),
-        ("nllb", "M2M100ForConditionalGeneration"),
        ("pegasus", "PegasusForConditionalGeneration"),
        ("pegasus_x", "PegasusXForConditionalGeneration"),
        ("plbart", "PLBartForConditionalGeneration"),
@@ -938,7 +936,6 @@ MODEL_FOR_BACKBONE_MAPPING_NAMES = OrderedDict(
        ("bit", "BitBackbone"),
        ("convnext", "ConvNextBackbone"),
        ("dinat", "DinatBackbone"),
-        ("efficientnet", "EfficientNetBackbone"),
        ("maskformer-swin", "MaskFormerSwinBackbone"),
        ("nat", "NatBackbone"),
        ("resnet", "ResNetBackbone"),

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -26,9 +26,6 @@ from transformers.models.auto import get_values
 from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
 from transformers.models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING_NAMES
 from transformers.models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING_NAMES
-from transformers.models.auto.modeling_auto import MODEL_MAPPING_NAMES
-from transformers.models.auto.modeling_flax_auto import FLAX_MODEL_MAPPING_NAMES
-from transformers.models.auto.modeling_tf_auto import TF_MODEL_MAPPING_NAMES
 from transformers.models.auto.processing_auto import PROCESSOR_MAPPING_NAMES
 from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING_NAMES
 from transformers.utils import ENV_VARS_TRUE_VALUES, direct_transformers_import
@@ -617,17 +614,21 @@ def check_all_auto_object_names_being_defined():
    """Check all names defined in auto (name) mappings exist in the library."""
    failures = []

-    mapping_to_check = {
+    mappings_to_check = {
        "TOKENIZER_MAPPING_NAMES": TOKENIZER_MAPPING_NAMES,
        "IMAGE_PROCESSOR_MAPPING_NAMES": IMAGE_PROCESSOR_MAPPING_NAMES,
        "FEATURE_EXTRACTOR_MAPPING_NAMES": FEATURE_EXTRACTOR_MAPPING_NAMES,
        "PROCESSOR_MAPPING_NAMES": PROCESSOR_MAPPING_NAMES,
-        "MODEL_MAPPING_NAMES": MODEL_MAPPING_NAMES,
-        "TF_MODEL_MAPPING_NAMES": TF_MODEL_MAPPING_NAMES,
-        "FLAX_MODEL_MAPPING_NAMES": FLAX_MODEL_MAPPING_NAMES,
    }

-    for name, mapping in mapping_to_check.items():
+    # Each auto modeling files contains multiple mappings. Let's get them in a dynamic way.
+    for module_name in ["modeling_auto", "modeling_tf_auto", "modeling_flax_auto"]:
+        module = getattr(transformers.models.auto, module_name)
+        # all mappings in a single auto modeling file
+        mapping_names = [x for x in dir(module) if x.endswith("_MAPPING_NAMES")]
+        mappings_to_check.update({name: getattr(module, name) for name in mapping_names})
+
+    for name, mapping in mappings_to_check.items():
        for model_type, class_names in mapping.items():
            if not isinstance(class_names, tuple):
                class_names = (class_names,)
@@ -652,16 +653,20 @@ def check_all_auto_mapping_names_in_config_mapping_names():
    failures = []

    # `TOKENIZER_PROCESSOR_MAPPING_NAMES` and `AutoTokenizer` is special, and don't need to follow the rule.
-    mapping_to_check = {
+    mappings_to_check = {
        "IMAGE_PROCESSOR_MAPPING_NAMES": IMAGE_PROCESSOR_MAPPING_NAMES,
        "FEATURE_EXTRACTOR_MAPPING_NAMES": FEATURE_EXTRACTOR_MAPPING_NAMES,
        "PROCESSOR_MAPPING_NAMES": PROCESSOR_MAPPING_NAMES,
-        "MODEL_MAPPING_NAMES": MODEL_MAPPING_NAMES,
-        "TF_MODEL_MAPPING_NAMES": TF_MODEL_MAPPING_NAMES,
-        "FLAX_MODEL_MAPPING_NAMES": FLAX_MODEL_MAPPING_NAMES,
    }

-    for name, mapping in mapping_to_check.items():
+    # Each auto modeling files contains multiple mappings. Let's get them in a dynamic way.
+    for module_name in ["modeling_auto", "modeling_tf_auto", "modeling_flax_auto"]:
+        module = getattr(transformers.models.auto, module_name)
+        # all mappings in a single auto modeling file
+        mapping_names = [x for x in dir(module) if x.endswith("_MAPPING_NAMES")]
+        mappings_to_check.update({name: getattr(module, name) for name in mapping_names})
+
+    for name, mapping in mappings_to_check.items():
        for model_type, class_names in mapping.items():
            if model_type not in CONFIG_MAPPING_NAMES:
                failures.append(

--- a/utils/check_task_guides.py
+++ b/utils/check_task_guides.py
@@ -72,14 +72,24 @@ TASK_GUIDE_TO_MODELS = {
    "document_question_answering.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
 }

+# This list contains model types used in some task guides that are not in `CONFIG_MAPPING_NAMES` (therefore not in any
+# `MODEL_MAPPING_NAMES` or any `MODEL_FOR_XXX_MAPPING_NAMES`).
+SPECIAL_TASK_GUIDE_TO_MODEL_TYPES = {
+    "summarization.mdx": ("nllb",),
+    "translation.mdx": ("nllb",),
+}
+

 def get_model_list_for_task(task_guide):
    """
    Return the list of models supporting given task.
    """
-    config_maping_names = TASK_GUIDE_TO_MODELS[task_guide]
+    model_maping_names = TASK_GUIDE_TO_MODELS[task_guide]
+    special_model_types = SPECIAL_TASK_GUIDE_TO_MODEL_TYPES.get(task_guide, set())
    model_names = {
-        code: name for code, name in transformers_module.MODEL_NAMES_MAPPING.items() if code in config_maping_names
+        code: name
+        for code, name in transformers_module.MODEL_NAMES_MAPPING.items()
+        if (code in model_maping_names or code in special_model_types)
    }
    return ", ".join([f"[{name}](../model_doc/{code})" for code, name in model_names.items()]) + "\n"