"test/vscode:/vscode.git/clone" did not exist on "ffde465d68961f8f24fe47a81968283d82dc943c"
Unverified Commit 4eb918e6 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

AutoImageProcessor (#20111)

* AutoImageProcessor skeleton

* Update references

* Add mapping in init

* Add model image processors to __init__ for importing

* Add AutoImageProcessor tests

* Fix up

* Image Processor documentation

* Remove pdb

* Update docs/source/en/model_doc/mobilevit.mdx

* Update docs

* Don't add whitespace on json files

* Remove fixtures

* Move checking model config down

* Fix up

* Add check for image processor

* Remove FeatureExtractorMixin in docstrings

* Rename model_tmpfile to config_tmpfile

* Don't make None if not in image processor map
parent c08a1e26
......@@ -29,6 +29,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_levit"] = ["LevitFeatureExtractor"]
_import_structure["image_processing_levit"] = ["LevitImageProcessor"]
try:
if not is_torch_available():
......@@ -55,6 +56,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_levit import LevitFeatureExtractor
from .image_processing_levit import LevitImageProcessor
try:
if not is_torch_available():
......
......@@ -37,6 +37,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_mobilevit"] = ["MobileViTFeatureExtractor"]
_import_structure["image_processing_mobilevit"] = ["MobileViTImageProcessor"]
try:
if not is_torch_available():
......@@ -76,6 +77,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_mobilevit import MobileViTFeatureExtractor
from .image_processing_mobilevit import MobileViTImageProcessor
try:
if not is_torch_available():
......@@ -91,14 +93,6 @@ if TYPE_CHECKING:
MobileViTPreTrainedModel,
)
try:
if not is_vision_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
from .feature_extraction_mobilevit import MobileViTFeatureExtractor
try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
......
......@@ -38,6 +38,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_perceiver"] = ["PerceiverFeatureExtractor"]
_import_structure["image_processing_perceiver"] = ["PerceiverImageProcessor"]
try:
if not is_torch_available():
......@@ -71,6 +72,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_perceiver import PerceiverFeatureExtractor
from .image_processing_perceiver import PerceiverImageProcessor
try:
if not is_torch_available():
......
......@@ -30,6 +30,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_poolformer"] = ["PoolFormerFeatureExtractor"]
_import_structure["image_processing_poolformer"] = ["PoolFormerImageProcessor"]
try:
if not is_torch_available():
......@@ -55,6 +56,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_poolformer import PoolFormerFeatureExtractor
from .image_processing_poolformer import PoolFormerImageProcessor
try:
if not is_torch_available():
......
......@@ -37,6 +37,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_segformer"] = ["SegformerFeatureExtractor"]
_import_structure["image_processing_segformer"] = ["SegformerImageProcessor"]
try:
if not is_torch_available():
......@@ -80,6 +81,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_segformer import SegformerFeatureExtractor
from .image_processing_segformer import SegformerImageProcessor
try:
if not is_torch_available():
......
......@@ -45,6 +45,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_videomae"] = ["VideoMAEFeatureExtractor"]
_import_structure["image_processing_videomae"] = ["VideoMAEImageProcessor"]
if TYPE_CHECKING:
from .configuration_videomae import VIDEOMAE_PRETRAINED_CONFIG_ARCHIVE_MAP, VideoMAEConfig
......@@ -70,6 +71,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_videomae import VideoMAEFeatureExtractor
from .image_processing_videomae import VideoMAEImageProcessor
else:
import sys
......
......@@ -30,6 +30,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_vilt"] = ["ViltFeatureExtractor"]
_import_structure["image_processing_vilt"] = ["ViltImageProcessor"]
_import_structure["processing_vilt"] = ["ViltProcessor"]
try:
......@@ -61,6 +62,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_vilt import ViltFeatureExtractor
from .image_processing_vilt import ViltImageProcessor
from .processing_vilt import ViltProcessor
try:
......
......@@ -36,6 +36,7 @@ except OptionalDependencyNotAvailable:
pass
else:
_import_structure["feature_extraction_vit"] = ["ViTFeatureExtractor"]
_import_structure["image_processing_vit"] = ["ViTImageProcessor"]
try:
if not is_torch_available():
......@@ -85,6 +86,7 @@ if TYPE_CHECKING:
pass
else:
from .feature_extraction_vit import ViTFeatureExtractor
from .image_processing_vit import ViTImageProcessor
try:
if not is_torch_available():
......
......@@ -3,7 +3,7 @@
from ..utils import DummyObject, requires_backends
class ImageProcessorMixin(metaclass=DummyObject):
class ImageProcessingMixin(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
......@@ -36,6 +36,13 @@ class BeitFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class BeitImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class CLIPFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -43,6 +50,13 @@ class CLIPFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class CLIPImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class ConditionalDetrFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -57,6 +71,13 @@ class ConvNextFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class ConvNextImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class DeformableDetrFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -71,6 +92,13 @@ class DeiTFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class DeiTImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class DetrFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -92,6 +120,13 @@ class DPTFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class DPTImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class FlavaFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -99,6 +134,13 @@ class FlavaFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class FlavaImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class FlavaProcessor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -113,6 +155,13 @@ class GLPNFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class GLPNImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class ImageGPTFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -120,6 +169,13 @@ class ImageGPTFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class ImageGPTImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class LayoutLMv2FeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -127,6 +183,13 @@ class LayoutLMv2FeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class LayoutLMv2ImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class LayoutLMv3FeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -134,6 +197,13 @@ class LayoutLMv3FeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class LayoutLMv3ImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class LevitFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -141,6 +211,13 @@ class LevitFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class LevitImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class MaskFormerFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -155,6 +232,13 @@ class MobileViTFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class MobileViTImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class OwlViTFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -169,6 +253,13 @@ class PerceiverFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class PerceiverImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class PoolFormerFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -176,6 +267,13 @@ class PoolFormerFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class PoolFormerImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class SegformerFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -183,6 +281,13 @@ class SegformerFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class SegformerImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class VideoMAEFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -190,6 +295,13 @@ class VideoMAEFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class VideoMAEImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class ViltFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -197,6 +309,13 @@ class ViltFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class ViltImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class ViltProcessor(metaclass=DummyObject):
_backends = ["vision"]
......@@ -211,6 +330,13 @@ class ViTFeatureExtractor(metaclass=DummyObject):
requires_backends(self, ["vision"])
class ViTImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class YolosFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"]
......
# coding=utf-8
# Copyright 2021 the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import sys
import tempfile
import unittest
from pathlib import Path
from transformers import (
CONFIG_MAPPING,
IMAGE_PROCESSOR_MAPPING,
AutoConfig,
AutoImageProcessor,
CLIPConfig,
CLIPImageProcessor,
)
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER
sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils"))
from test_module.custom_configuration import CustomConfig # noqa E402
from test_module.custom_image_processing import CustomImageProcessor # noqa E402
class AutoImageProcessorTest(unittest.TestCase):
def test_image_processor_from_model_shortcut(self):
config = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
self.assertIsInstance(config, CLIPImageProcessor)
def test_image_processor_from_local_directory_from_key(self):
with tempfile.TemporaryDirectory() as tmpdirname:
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
config_tmpfile = Path(tmpdirname) / "config.json"
json.dump(
{"image_processor_type": "CLIPImageProcessor", "processor_class": "CLIPProcessor"},
open(processor_tmpfile, "w"),
)
json.dump({"model_type": "clip"}, open(config_tmpfile, "w"))
config = AutoImageProcessor.from_pretrained(tmpdirname)
self.assertIsInstance(config, CLIPImageProcessor)
def test_image_processor_from_local_directory_from_feature_extractor_key(self):
# Ensure we can load the image processor from the feature extractor config
with tempfile.TemporaryDirectory() as tmpdirname:
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
config_tmpfile = Path(tmpdirname) / "config.json"
json.dump(
{"feature_extractor_type": "CLIPFeatureExtractor", "processor_class": "CLIPProcessor"},
open(processor_tmpfile, "w"),
)
json.dump({"model_type": "clip"}, open(config_tmpfile, "w"))
config = AutoImageProcessor.from_pretrained(tmpdirname)
self.assertIsInstance(config, CLIPImageProcessor)
def test_image_processor_from_local_directory_from_config(self):
with tempfile.TemporaryDirectory() as tmpdirname:
model_config = CLIPConfig()
# Create a dummy config file with image_proceesor_type
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
config_tmpfile = Path(tmpdirname) / "config.json"
json.dump(
{"image_processor_type": "CLIPImageProcessor", "processor_class": "CLIPProcessor"},
open(processor_tmpfile, "w"),
)
json.dump({"model_type": "clip"}, open(config_tmpfile, "w"))
# remove image_processor_type to make sure config.json alone is enough to load image processor locally
config_dict = AutoImageProcessor.from_pretrained(tmpdirname).to_dict()
config_dict.pop("image_processor_type")
config = CLIPImageProcessor(**config_dict)
# save in new folder
model_config.save_pretrained(tmpdirname)
config.save_pretrained(tmpdirname)
config = AutoImageProcessor.from_pretrained(tmpdirname)
# make sure private variable is not incorrectly saved
dict_as_saved = json.loads(config.to_json_string())
self.assertTrue("_processor_class" not in dict_as_saved)
self.assertIsInstance(config, CLIPImageProcessor)
def test_image_processor_from_local_file(self):
with tempfile.TemporaryDirectory() as tmpdirname:
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
json.dump(
{"image_processor_type": "CLIPImageProcessor", "processor_class": "CLIPProcessor"},
open(processor_tmpfile, "w"),
)
config = AutoImageProcessor.from_pretrained(processor_tmpfile)
self.assertIsInstance(config, CLIPImageProcessor)
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "clip-base is not a local folder and is not a valid model identifier"
):
_ = AutoImageProcessor.from_pretrained("clip-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = AutoImageProcessor.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_image_processor_not_found(self):
with self.assertRaisesRegex(
EnvironmentError,
"hf-internal-testing/config-no-model does not appear to have a file named preprocessor_config.json.",
):
_ = AutoImageProcessor.from_pretrained("hf-internal-testing/config-no-model")
def test_from_pretrained_dynamic_image_processor(self):
model = AutoImageProcessor.from_pretrained(
"hf-internal-testing/test_dynamic_image_processor", trust_remote_code=True
)
self.assertEqual(model.__class__.__name__, "NewImageProcessor")
def test_new_image_processor_registration(self):
try:
AutoConfig.register("custom", CustomConfig)
AutoImageProcessor.register(CustomConfig, CustomImageProcessor)
# Trying to register something existing in the Transformers library will raise an error
with self.assertRaises(ValueError):
AutoImageProcessor.register(CLIPConfig, CLIPImageProcessor)
with tempfile.TemporaryDirectory() as tmpdirname:
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json"
config_tmpfile = Path(tmpdirname) / "config.json"
json.dump(
{"feature_extractor_type": "CLIPFeatureExtractor", "processor_class": "CLIPProcessor"},
open(processor_tmpfile, "w"),
)
json.dump({"model_type": "clip"}, open(config_tmpfile, "w"))
image_processor = CustomImageProcessor.from_pretrained(tmpdirname)
# Now that the config is registered, it can be used as any other config with the auto-API
with tempfile.TemporaryDirectory() as tmp_dir:
image_processor.save_pretrained(tmp_dir)
new_image_processor = AutoImageProcessor.from_pretrained(tmp_dir)
self.assertIsInstance(new_image_processor, CustomImageProcessor)
finally:
if "custom" in CONFIG_MAPPING._extra_content:
del CONFIG_MAPPING._extra_content["custom"]
if CustomConfig in IMAGE_PROCESSOR_MAPPING._extra_content:
del IMAGE_PROCESSOR_MAPPING._extra_content[CustomConfig]
from transformers import CLIPImageProcessor
class CustomImageProcessor(CLIPImageProcessor):
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment