Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
321ef388
Unverified
Commit
321ef388
authored
Nov 28, 2022
by
amyeroberts
Committed by
GitHub
Nov 28, 2022
Browse files
Include image processor in add-new-model-like (#20439)
parent
0bae286d
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
377 additions
and
131 deletions
+377
-131
src/transformers/commands/add_new_model_like.py
src/transformers/commands/add_new_model_like.py
+49
-9
tests/utils/test_add_new_model_like.py
tests/utils/test_add_new_model_like.py
+328
-122
No files found.
src/transformers/commands/add_new_model_like.py
View file @
321ef388
...
...
@@ -62,6 +62,9 @@ class ModelPatterns:
The tokenizer class associated with this model. Will default to `"{model_camel_cased}Config"`.
tokenizer_class (`str`, *optional*):
The tokenizer class associated with this model (leave to `None` for models that don't use a tokenizer).
image_processor_class (`str`, *optional*):
The image processor class associated with this model (leave to `None` for models that don't use an image
processor).
feature_extractor_class (`str`, *optional*):
The feature extractor class associated with this model (leave to `None` for models that don't use a feature
extractor).
...
...
@@ -77,6 +80,7 @@ class ModelPatterns:
model_upper_cased
:
Optional
[
str
]
=
None
config_class
:
Optional
[
str
]
=
None
tokenizer_class
:
Optional
[
str
]
=
None
image_processor_class
:
Optional
[
str
]
=
None
feature_extractor_class
:
Optional
[
str
]
=
None
processor_class
:
Optional
[
str
]
=
None
...
...
@@ -101,6 +105,7 @@ class ModelPatterns:
ATTRIBUTE_TO_PLACEHOLDER
=
{
"config_class"
:
"[CONFIG_CLASS]"
,
"tokenizer_class"
:
"[TOKENIZER_CLASS]"
,
"image_processor_class"
:
"[IMAGE_PROCESSOR_CLASS]"
,
"feature_extractor_class"
:
"[FEATURE_EXTRACTOR_CLASS]"
,
"processor_class"
:
"[PROCESSOR_CLASS]"
,
"checkpoint"
:
"[CHECKPOINT]"
,
...
...
@@ -283,7 +288,7 @@ def replace_model_patterns(
# contains the camel-cased named, but will be treated before.
attributes_to_check
=
[
"config_class"
]
# Add relevant preprocessing classes
for
attr
in
[
"tokenizer_class"
,
"feature_extractor_class"
,
"processor_class"
]:
for
attr
in
[
"tokenizer_class"
,
"image_processor_class"
,
"feature_extractor_class"
,
"processor_class"
]:
if
getattr
(
old_model_patterns
,
attr
)
is
not
None
and
getattr
(
new_model_patterns
,
attr
)
is
not
None
:
attributes_to_check
.
append
(
attr
)
...
...
@@ -553,6 +558,7 @@ def get_model_files(model_type: str, frameworks: Optional[List[str]] = None) ->
f
"test_modeling_tf_
{
module_name
}
.py"
,
f
"test_modeling_flax_
{
module_name
}
.py"
,
f
"test_tokenization_
{
module_name
}
.py"
,
f
"test_image_processing_
{
module_name
}
.py"
,
f
"test_feature_extraction_
{
module_name
}
.py"
,
f
"test_processor_
{
module_name
}
.py"
,
]
...
...
@@ -687,6 +693,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):
tokenizer_class
=
tokenizer_classes
[
0
]
if
tokenizer_classes
[
0
]
is
not
None
else
tokenizer_classes
[
1
]
else
:
tokenizer_class
=
None
image_processor_class
=
auto_module
.
image_processing_auto
.
IMAGE_PROCESSOR_MAPPING_NAMES
.
get
(
model_type
,
None
)
feature_extractor_class
=
auto_module
.
feature_extraction_auto
.
FEATURE_EXTRACTOR_MAPPING_NAMES
.
get
(
model_type
,
None
)
processor_class
=
auto_module
.
processing_auto
.
PROCESSOR_MAPPING_NAMES
.
get
(
model_type
,
None
)
...
...
@@ -731,6 +738,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):
model_upper_cased
=
model_upper_cased
,
config_class
=
config_class
,
tokenizer_class
=
tokenizer_class
,
image_processor_class
=
image_processor_class
,
feature_extractor_class
=
feature_extractor_class
,
processor_class
=
processor_class
,
)
...
...
@@ -748,14 +756,15 @@ def clean_frameworks_in_init(
):
"""
Removes all the import lines that don't belong to a given list of frameworks or concern tokenizers/feature
extractors/processors in an init.
extractors/
image processors/
processors in an init.
Args:
init_file (`str` or `os.PathLike`): The path to the init to treat.
frameworks (`List[str]`, *optional*):
If passed, this will remove all imports that are subject to a framework not in frameworks
keep_processing (`bool`, *optional*, defaults to `True`):
Whether or not to keep the preprocessing (tokenizer, feature extractor, processor) imports in the init.
Whether or not to keep the preprocessing (tokenizer, feature extractor, image processor, processor) imports
in the init.
"""
if
frameworks
is
None
:
frameworks
=
get_default_frameworks
()
...
...
@@ -808,8 +817,9 @@ def clean_frameworks_in_init(
idx
+=
1
# Otherwise we keep the line, except if it's a tokenizer import and we don't want to keep it.
elif
keep_processing
or
(
re
.
search
(
'^\s*"(tokenization|processing|feature_extraction)'
,
lines
[
idx
])
is
None
and
re
.
search
(
"^\s*from .(tokenization|processing|feature_extraction)"
,
lines
[
idx
])
is
None
re
.
search
(
'^\s*"(tokenization|processing|feature_extraction|image_processing)'
,
lines
[
idx
])
is
None
and
re
.
search
(
"^\s*from .(tokenization|processing|feature_extraction|image_processing)"
,
lines
[
idx
])
is
None
):
new_lines
.
append
(
lines
[
idx
])
idx
+=
1
...
...
@@ -885,6 +895,7 @@ def add_model_to_main_init(
if
not
with_processing
:
processing_classes
=
[
old_model_patterns
.
tokenizer_class
,
old_model_patterns
.
image_processor_class
,
old_model_patterns
.
feature_extractor_class
,
old_model_patterns
.
processor_class
,
]
...
...
@@ -962,6 +973,7 @@ AUTO_CLASSES_PATTERNS = {
' ("{model_type}", "{pretrained_archive_map}"),'
,
],
"feature_extraction_auto.py"
:
[
' ("{model_type}", "{feature_extractor_class}"),'
],
"image_processing_auto.py"
:
[
' ("{model_type}", "{image_processor_class}"),'
],
"modeling_auto.py"
:
[
' ("{model_type}", "{any_pt_class}"),'
],
"modeling_tf_auto.py"
:
[
' ("{model_type}", "{any_tf_class}"),'
],
"modeling_flax_auto.py"
:
[
' ("{model_type}", "{any_flax_class}"),'
],
...
...
@@ -995,6 +1007,14 @@ def add_model_to_auto_classes(
)
elif
"{config_class}"
in
pattern
:
new_patterns
.
append
(
pattern
.
replace
(
"{config_class}"
,
old_model_patterns
.
config_class
))
elif
"{image_processor_class}"
in
pattern
:
if
(
old_model_patterns
.
image_processor_class
is
not
None
and
new_model_patterns
.
image_processor_class
is
not
None
):
new_patterns
.
append
(
pattern
.
replace
(
"{image_processor_class}"
,
old_model_patterns
.
image_processor_class
)
)
elif
"{feature_extractor_class}"
in
pattern
:
if
(
old_model_patterns
.
feature_extractor_class
is
not
None
...
...
@@ -1121,6 +1141,10 @@ def duplicate_doc_file(
# We only add the tokenizer if necessary
if
old_model_patterns
.
tokenizer_class
!=
new_model_patterns
.
tokenizer_class
:
new_blocks
.
append
(
new_block
)
elif
"ImageProcessor"
in
block_class
:
# We only add the image processor if necessary
if
old_model_patterns
.
image_processor_class
!=
new_model_patterns
.
image_processor_class
:
new_blocks
.
append
(
new_block
)
elif
"FeatureExtractor"
in
block_class
:
# We only add the feature extractor if necessary
if
old_model_patterns
.
feature_extractor_class
!=
new_model_patterns
.
feature_extractor_class
:
...
...
@@ -1182,7 +1206,7 @@ def create_new_model_like(
)
keep_old_processing
=
True
for
processing_attr
in
[
"feature_extractor_class"
,
"processor_class"
,
"tokenizer_class"
]:
for
processing_attr
in
[
"image_processor_class"
,
"feature_extractor_class"
,
"processor_class"
,
"tokenizer_class"
]:
if
getattr
(
old_model_patterns
,
processing_attr
)
!=
getattr
(
new_model_patterns
,
processing_attr
):
keep_old_processing
=
False
...
...
@@ -1198,7 +1222,10 @@ def create_new_model_like(
files_to_adapt
=
[
f
for
f
in
files_to_adapt
if
"tokenization"
not
in
str
(
f
)
and
"processing"
not
in
str
(
f
)
and
"feature_extraction"
not
in
str
(
f
)
if
"tokenization"
not
in
str
(
f
)
and
"processing"
not
in
str
(
f
)
and
"feature_extraction"
not
in
str
(
f
)
and
"image_processing"
not
in
str
(
f
)
]
os
.
makedirs
(
module_folder
,
exist_ok
=
True
)
...
...
@@ -1236,7 +1263,10 @@ def create_new_model_like(
files_to_adapt
=
[
f
for
f
in
files_to_adapt
if
"tokenization"
not
in
str
(
f
)
and
"processor"
not
in
str
(
f
)
and
"feature_extraction"
not
in
str
(
f
)
if
"tokenization"
not
in
str
(
f
)
and
"processor"
not
in
str
(
f
)
and
"feature_extraction"
not
in
str
(
f
)
and
"image_processing"
not
in
str
(
f
)
]
def
disable_fx_test
(
filename
:
Path
)
->
bool
:
...
...
@@ -1458,6 +1488,7 @@ def get_user_input():
old_model_info
=
retrieve_info_for_model
(
old_model_type
)
old_tokenizer_class
=
old_model_info
[
"model_patterns"
].
tokenizer_class
old_image_processor_class
=
old_model_info
[
"model_patterns"
].
image_processor_class
old_feature_extractor_class
=
old_model_info
[
"model_patterns"
].
feature_extractor_class
old_processor_class
=
old_model_info
[
"model_patterns"
].
processor_class
old_frameworks
=
old_model_info
[
"frameworks"
]
...
...
@@ -1497,7 +1528,9 @@ def get_user_input():
)
old_processing_classes
=
[
c
for
c
in
[
old_feature_extractor_class
,
old_tokenizer_class
,
old_processor_class
]
if
c
is
not
None
c
for
c
in
[
old_image_processor_class
,
old_feature_extractor_class
,
old_tokenizer_class
,
old_processor_class
]
if
c
is
not
None
]
old_processing_classes
=
", "
.
join
(
old_processing_classes
)
keep_processing
=
get_user_field
(
...
...
@@ -1506,6 +1539,7 @@ def get_user_input():
fallback_message
=
"Please answer yes/no, y/n, true/false or 1/0. "
,
)
if
keep_processing
:
image_processor_class
=
old_image_processor_class
feature_extractor_class
=
old_feature_extractor_class
processor_class
=
old_processor_class
tokenizer_class
=
old_tokenizer_class
...
...
@@ -1517,6 +1551,11 @@ def get_user_input():
)
else
:
tokenizer_class
=
None
if
old_image_processor_class
is
not
None
:
image_processor_class
=
get_user_field
(
"What will be the name of the image processor class for this model? "
,
default_value
=
f
"
{
model_camel_cased
}
ImageProcessor"
,
)
if
old_feature_extractor_class
is
not
None
:
feature_extractor_class
=
get_user_field
(
"What will be the name of the feature extractor class for this model? "
,
...
...
@@ -1541,6 +1580,7 @@ def get_user_input():
model_upper_cased
=
model_upper_cased
,
config_class
=
config_class
,
tokenizer_class
=
tokenizer_class
,
image_processor_class
=
image_processor_class
,
feature_extractor_class
=
feature_extractor_class
,
processor_class
=
processor_class
,
)
...
...
tests/utils/test_add_new_model_like.py
View file @
321ef388
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment