"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "9286f0ac3939a7081773fc66480f651a7d6a8404"
Unverified Commit 321ef388 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Include image processor in add-new-model-like (#20439)

parent 0bae286d
...@@ -62,6 +62,9 @@ class ModelPatterns: ...@@ -62,6 +62,9 @@ class ModelPatterns:
The tokenizer class associated with this model. Will default to `"{model_camel_cased}Config"`. The tokenizer class associated with this model. Will default to `"{model_camel_cased}Config"`.
tokenizer_class (`str`, *optional*): tokenizer_class (`str`, *optional*):
The tokenizer class associated with this model (leave to `None` for models that don't use a tokenizer). The tokenizer class associated with this model (leave to `None` for models that don't use a tokenizer).
image_processor_class (`str`, *optional*):
The image processor class associated with this model (leave to `None` for models that don't use an image
processor).
feature_extractor_class (`str`, *optional*): feature_extractor_class (`str`, *optional*):
The feature extractor class associated with this model (leave to `None` for models that don't use a feature The feature extractor class associated with this model (leave to `None` for models that don't use a feature
extractor). extractor).
...@@ -77,6 +80,7 @@ class ModelPatterns: ...@@ -77,6 +80,7 @@ class ModelPatterns:
model_upper_cased: Optional[str] = None model_upper_cased: Optional[str] = None
config_class: Optional[str] = None config_class: Optional[str] = None
tokenizer_class: Optional[str] = None tokenizer_class: Optional[str] = None
image_processor_class: Optional[str] = None
feature_extractor_class: Optional[str] = None feature_extractor_class: Optional[str] = None
processor_class: Optional[str] = None processor_class: Optional[str] = None
...@@ -101,6 +105,7 @@ class ModelPatterns: ...@@ -101,6 +105,7 @@ class ModelPatterns:
ATTRIBUTE_TO_PLACEHOLDER = { ATTRIBUTE_TO_PLACEHOLDER = {
"config_class": "[CONFIG_CLASS]", "config_class": "[CONFIG_CLASS]",
"tokenizer_class": "[TOKENIZER_CLASS]", "tokenizer_class": "[TOKENIZER_CLASS]",
"image_processor_class": "[IMAGE_PROCESSOR_CLASS]",
"feature_extractor_class": "[FEATURE_EXTRACTOR_CLASS]", "feature_extractor_class": "[FEATURE_EXTRACTOR_CLASS]",
"processor_class": "[PROCESSOR_CLASS]", "processor_class": "[PROCESSOR_CLASS]",
"checkpoint": "[CHECKPOINT]", "checkpoint": "[CHECKPOINT]",
...@@ -283,7 +288,7 @@ def replace_model_patterns( ...@@ -283,7 +288,7 @@ def replace_model_patterns(
# contains the camel-cased named, but will be treated before. # contains the camel-cased named, but will be treated before.
attributes_to_check = ["config_class"] attributes_to_check = ["config_class"]
# Add relevant preprocessing classes # Add relevant preprocessing classes
for attr in ["tokenizer_class", "feature_extractor_class", "processor_class"]: for attr in ["tokenizer_class", "image_processor_class", "feature_extractor_class", "processor_class"]:
if getattr(old_model_patterns, attr) is not None and getattr(new_model_patterns, attr) is not None: if getattr(old_model_patterns, attr) is not None and getattr(new_model_patterns, attr) is not None:
attributes_to_check.append(attr) attributes_to_check.append(attr)
...@@ -553,6 +558,7 @@ def get_model_files(model_type: str, frameworks: Optional[List[str]] = None) -> ...@@ -553,6 +558,7 @@ def get_model_files(model_type: str, frameworks: Optional[List[str]] = None) ->
f"test_modeling_tf_{module_name}.py", f"test_modeling_tf_{module_name}.py",
f"test_modeling_flax_{module_name}.py", f"test_modeling_flax_{module_name}.py",
f"test_tokenization_{module_name}.py", f"test_tokenization_{module_name}.py",
f"test_image_processing_{module_name}.py",
f"test_feature_extraction_{module_name}.py", f"test_feature_extraction_{module_name}.py",
f"test_processor_{module_name}.py", f"test_processor_{module_name}.py",
] ]
...@@ -687,6 +693,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None): ...@@ -687,6 +693,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):
tokenizer_class = tokenizer_classes[0] if tokenizer_classes[0] is not None else tokenizer_classes[1] tokenizer_class = tokenizer_classes[0] if tokenizer_classes[0] is not None else tokenizer_classes[1]
else: else:
tokenizer_class = None tokenizer_class = None
image_processor_class = auto_module.image_processing_auto.IMAGE_PROCESSOR_MAPPING_NAMES.get(model_type, None)
feature_extractor_class = auto_module.feature_extraction_auto.FEATURE_EXTRACTOR_MAPPING_NAMES.get(model_type, None) feature_extractor_class = auto_module.feature_extraction_auto.FEATURE_EXTRACTOR_MAPPING_NAMES.get(model_type, None)
processor_class = auto_module.processing_auto.PROCESSOR_MAPPING_NAMES.get(model_type, None) processor_class = auto_module.processing_auto.PROCESSOR_MAPPING_NAMES.get(model_type, None)
...@@ -731,6 +738,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None): ...@@ -731,6 +738,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):
model_upper_cased=model_upper_cased, model_upper_cased=model_upper_cased,
config_class=config_class, config_class=config_class,
tokenizer_class=tokenizer_class, tokenizer_class=tokenizer_class,
image_processor_class=image_processor_class,
feature_extractor_class=feature_extractor_class, feature_extractor_class=feature_extractor_class,
processor_class=processor_class, processor_class=processor_class,
) )
...@@ -748,14 +756,15 @@ def clean_frameworks_in_init( ...@@ -748,14 +756,15 @@ def clean_frameworks_in_init(
): ):
""" """
Removes all the import lines that don't belong to a given list of frameworks or concern tokenizers/feature Removes all the import lines that don't belong to a given list of frameworks or concern tokenizers/feature
extractors/processors in an init. extractors/image processors/processors in an init.
Args: Args:
init_file (`str` or `os.PathLike`): The path to the init to treat. init_file (`str` or `os.PathLike`): The path to the init to treat.
frameworks (`List[str]`, *optional*): frameworks (`List[str]`, *optional*):
If passed, this will remove all imports that are subject to a framework not in frameworks If passed, this will remove all imports that are subject to a framework not in frameworks
keep_processing (`bool`, *optional*, defaults to `True`): keep_processing (`bool`, *optional*, defaults to `True`):
Whether or not to keep the preprocessing (tokenizer, feature extractor, processor) imports in the init. Whether or not to keep the preprocessing (tokenizer, feature extractor, image processor, processor) imports
in the init.
""" """
if frameworks is None: if frameworks is None:
frameworks = get_default_frameworks() frameworks = get_default_frameworks()
...@@ -808,8 +817,9 @@ def clean_frameworks_in_init( ...@@ -808,8 +817,9 @@ def clean_frameworks_in_init(
idx += 1 idx += 1
# Otherwise we keep the line, except if it's a tokenizer import and we don't want to keep it. # Otherwise we keep the line, except if it's a tokenizer import and we don't want to keep it.
elif keep_processing or ( elif keep_processing or (
re.search('^\s*"(tokenization|processing|feature_extraction)', lines[idx]) is None re.search('^\s*"(tokenization|processing|feature_extraction|image_processing)', lines[idx]) is None
and re.search("^\s*from .(tokenization|processing|feature_extraction)", lines[idx]) is None and re.search("^\s*from .(tokenization|processing|feature_extraction|image_processing)", lines[idx])
is None
): ):
new_lines.append(lines[idx]) new_lines.append(lines[idx])
idx += 1 idx += 1
...@@ -885,6 +895,7 @@ def add_model_to_main_init( ...@@ -885,6 +895,7 @@ def add_model_to_main_init(
if not with_processing: if not with_processing:
processing_classes = [ processing_classes = [
old_model_patterns.tokenizer_class, old_model_patterns.tokenizer_class,
old_model_patterns.image_processor_class,
old_model_patterns.feature_extractor_class, old_model_patterns.feature_extractor_class,
old_model_patterns.processor_class, old_model_patterns.processor_class,
] ]
...@@ -962,6 +973,7 @@ AUTO_CLASSES_PATTERNS = { ...@@ -962,6 +973,7 @@ AUTO_CLASSES_PATTERNS = {
' ("{model_type}", "{pretrained_archive_map}"),', ' ("{model_type}", "{pretrained_archive_map}"),',
], ],
"feature_extraction_auto.py": [' ("{model_type}", "{feature_extractor_class}"),'], "feature_extraction_auto.py": [' ("{model_type}", "{feature_extractor_class}"),'],
"image_processing_auto.py": [' ("{model_type}", "{image_processor_class}"),'],
"modeling_auto.py": [' ("{model_type}", "{any_pt_class}"),'], "modeling_auto.py": [' ("{model_type}", "{any_pt_class}"),'],
"modeling_tf_auto.py": [' ("{model_type}", "{any_tf_class}"),'], "modeling_tf_auto.py": [' ("{model_type}", "{any_tf_class}"),'],
"modeling_flax_auto.py": [' ("{model_type}", "{any_flax_class}"),'], "modeling_flax_auto.py": [' ("{model_type}", "{any_flax_class}"),'],
...@@ -995,6 +1007,14 @@ def add_model_to_auto_classes( ...@@ -995,6 +1007,14 @@ def add_model_to_auto_classes(
) )
elif "{config_class}" in pattern: elif "{config_class}" in pattern:
new_patterns.append(pattern.replace("{config_class}", old_model_patterns.config_class)) new_patterns.append(pattern.replace("{config_class}", old_model_patterns.config_class))
elif "{image_processor_class}" in pattern:
if (
old_model_patterns.image_processor_class is not None
and new_model_patterns.image_processor_class is not None
):
new_patterns.append(
pattern.replace("{image_processor_class}", old_model_patterns.image_processor_class)
)
elif "{feature_extractor_class}" in pattern: elif "{feature_extractor_class}" in pattern:
if ( if (
old_model_patterns.feature_extractor_class is not None old_model_patterns.feature_extractor_class is not None
...@@ -1121,6 +1141,10 @@ def duplicate_doc_file( ...@@ -1121,6 +1141,10 @@ def duplicate_doc_file(
# We only add the tokenizer if necessary # We only add the tokenizer if necessary
if old_model_patterns.tokenizer_class != new_model_patterns.tokenizer_class: if old_model_patterns.tokenizer_class != new_model_patterns.tokenizer_class:
new_blocks.append(new_block) new_blocks.append(new_block)
elif "ImageProcessor" in block_class:
# We only add the image processor if necessary
if old_model_patterns.image_processor_class != new_model_patterns.image_processor_class:
new_blocks.append(new_block)
elif "FeatureExtractor" in block_class: elif "FeatureExtractor" in block_class:
# We only add the feature extractor if necessary # We only add the feature extractor if necessary
if old_model_patterns.feature_extractor_class != new_model_patterns.feature_extractor_class: if old_model_patterns.feature_extractor_class != new_model_patterns.feature_extractor_class:
...@@ -1182,7 +1206,7 @@ def create_new_model_like( ...@@ -1182,7 +1206,7 @@ def create_new_model_like(
) )
keep_old_processing = True keep_old_processing = True
for processing_attr in ["feature_extractor_class", "processor_class", "tokenizer_class"]: for processing_attr in ["image_processor_class", "feature_extractor_class", "processor_class", "tokenizer_class"]:
if getattr(old_model_patterns, processing_attr) != getattr(new_model_patterns, processing_attr): if getattr(old_model_patterns, processing_attr) != getattr(new_model_patterns, processing_attr):
keep_old_processing = False keep_old_processing = False
...@@ -1198,7 +1222,10 @@ def create_new_model_like( ...@@ -1198,7 +1222,10 @@ def create_new_model_like(
files_to_adapt = [ files_to_adapt = [
f f
for f in files_to_adapt for f in files_to_adapt
if "tokenization" not in str(f) and "processing" not in str(f) and "feature_extraction" not in str(f) if "tokenization" not in str(f)
and "processing" not in str(f)
and "feature_extraction" not in str(f)
and "image_processing" not in str(f)
] ]
os.makedirs(module_folder, exist_ok=True) os.makedirs(module_folder, exist_ok=True)
...@@ -1236,7 +1263,10 @@ def create_new_model_like( ...@@ -1236,7 +1263,10 @@ def create_new_model_like(
files_to_adapt = [ files_to_adapt = [
f f
for f in files_to_adapt for f in files_to_adapt
if "tokenization" not in str(f) and "processor" not in str(f) and "feature_extraction" not in str(f) if "tokenization" not in str(f)
and "processor" not in str(f)
and "feature_extraction" not in str(f)
and "image_processing" not in str(f)
] ]
def disable_fx_test(filename: Path) -> bool: def disable_fx_test(filename: Path) -> bool:
...@@ -1458,6 +1488,7 @@ def get_user_input(): ...@@ -1458,6 +1488,7 @@ def get_user_input():
old_model_info = retrieve_info_for_model(old_model_type) old_model_info = retrieve_info_for_model(old_model_type)
old_tokenizer_class = old_model_info["model_patterns"].tokenizer_class old_tokenizer_class = old_model_info["model_patterns"].tokenizer_class
old_image_processor_class = old_model_info["model_patterns"].image_processor_class
old_feature_extractor_class = old_model_info["model_patterns"].feature_extractor_class old_feature_extractor_class = old_model_info["model_patterns"].feature_extractor_class
old_processor_class = old_model_info["model_patterns"].processor_class old_processor_class = old_model_info["model_patterns"].processor_class
old_frameworks = old_model_info["frameworks"] old_frameworks = old_model_info["frameworks"]
...@@ -1497,7 +1528,9 @@ def get_user_input(): ...@@ -1497,7 +1528,9 @@ def get_user_input():
) )
old_processing_classes = [ old_processing_classes = [
c for c in [old_feature_extractor_class, old_tokenizer_class, old_processor_class] if c is not None c
for c in [old_image_processor_class, old_feature_extractor_class, old_tokenizer_class, old_processor_class]
if c is not None
] ]
old_processing_classes = ", ".join(old_processing_classes) old_processing_classes = ", ".join(old_processing_classes)
keep_processing = get_user_field( keep_processing = get_user_field(
...@@ -1506,6 +1539,7 @@ def get_user_input(): ...@@ -1506,6 +1539,7 @@ def get_user_input():
fallback_message="Please answer yes/no, y/n, true/false or 1/0. ", fallback_message="Please answer yes/no, y/n, true/false or 1/0. ",
) )
if keep_processing: if keep_processing:
image_processor_class = old_image_processor_class
feature_extractor_class = old_feature_extractor_class feature_extractor_class = old_feature_extractor_class
processor_class = old_processor_class processor_class = old_processor_class
tokenizer_class = old_tokenizer_class tokenizer_class = old_tokenizer_class
...@@ -1517,6 +1551,11 @@ def get_user_input(): ...@@ -1517,6 +1551,11 @@ def get_user_input():
) )
else: else:
tokenizer_class = None tokenizer_class = None
if old_image_processor_class is not None:
image_processor_class = get_user_field(
"What will be the name of the image processor class for this model? ",
default_value=f"{model_camel_cased}ImageProcessor",
)
if old_feature_extractor_class is not None: if old_feature_extractor_class is not None:
feature_extractor_class = get_user_field( feature_extractor_class = get_user_field(
"What will be the name of the feature extractor class for this model? ", "What will be the name of the feature extractor class for this model? ",
...@@ -1541,6 +1580,7 @@ def get_user_input(): ...@@ -1541,6 +1580,7 @@ def get_user_input():
model_upper_cased=model_upper_cased, model_upper_cased=model_upper_cased,
config_class=config_class, config_class=config_class,
tokenizer_class=tokenizer_class, tokenizer_class=tokenizer_class,
image_processor_class=image_processor_class,
feature_extractor_class=feature_extractor_class, feature_extractor_class=feature_extractor_class,
processor_class=processor_class, processor_class=processor_class,
) )
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment