PoC for a ProcessorMixin class (#15549)

* PoC for a ProcessorMixin class * Documentation * Apply suggestions from code review Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Suraj Patil <surajp815@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Roll out to other processors * Add base feature extractor class in init * Use args and kwargs Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Suraj Patil <surajp815@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

PoC for a ProcessorMixin class (#15549)
* PoC for a ProcessorMixin class * Documentation * Apply suggestions from code review Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Suraj Patil <surajp815@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Roll out to other processors * Add base feature extractor class in init * Use args and kwargs Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Suraj Patil <surajp815@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
b5c6fdec · Sylvain Gugger · GitHub · ba3f9a71 · b5c6fdec · b5c6fdec
Unverified Commit b5c6fdec authored Feb 09, 2022 by Sylvain Gugger Committed by GitHub Feb 09, 2022
13 changed files
--- a/docs/source/main_classes/processors.mdx
+++ b/docs/source/main_classes/processors.mdx
@@ -12,10 +12,22 @@ specific language governing permissions and limitations under the License.
 # Processors
-This library includes processors for several traditional tasks. These processors can be used to process a dataset into
+Processors can mean two different things in the Transformers library:
-examples that can be fed to a model.
+- the objects that pre-process inputs for multi-modal models such as [Wav2Vec2](../model_doc/wav2vec2) (speech and text)
+  or [CLIP](../model_doc/clip) (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
-## Processors
+## Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group tokenizers (for the text modality) and
+feature extractors (for vision and audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+## Deprecated processors
 All processors follow the same architecture which is that of the
 [`~data.processors.utils.DataProcessor`]. The processor returns a list of

--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -95,7 +95,7 @@ _import_structure = {
    "dependency_versions_table": [],
    "dynamic_module_utils": [],
    "feature_extraction_sequence_utils": ["SequenceFeatureExtractor"],
-    "feature_extraction_utils": ["BatchFeature"],
+    "feature_extraction_utils": ["BatchFeature", "FeatureExtractionMixin"],
    "file_utils": [
        "CONFIG_NAME",
        "MODEL_CARD_NAME",
@@ -365,6 +365,7 @@ _import_structure = {
        "ZeroShotClassificationPipeline",
        "pipeline",
    ],
+    "processing_utils": ["ProcessorMixin"],
    "testing_utils": [],
    "tokenization_utils": ["PreTrainedTokenizer"],
    "tokenization_utils_base": [
@@ -2307,7 +2308,7 @@ if TYPE_CHECKING:
    from .feature_extraction_sequence_utils import SequenceFeatureExtractor
    # Feature Extractor
-    from .feature_extraction_utils import BatchFeature
+    from .feature_extraction_utils import BatchFeature, FeatureExtractionMixin
    # Files and general utilities
    from .file_utils import (
@@ -2555,6 +2556,7 @@ if TYPE_CHECKING:
        ZeroShotClassificationPipeline,
        pipeline,
    )
+    from .processing_utils import ProcessorMixin
    # Tokenization
    from .tokenization_utils import PreTrainedTokenizer

--- a/src/transformers/models/clip/processing_clip.py
+++ b/src/transformers/models/clip/processing_clip.py
@@ -15,12 +15,11 @@
 """
 Image/Text processor class for CLIP
 """
+from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding
-from .feature_extraction_clip import CLIPFeatureExtractor
-from .tokenization_clip import CLIPTokenizer
-class CLIPProcessor:
+class CLIPProcessor(ProcessorMixin):
    r"""
    Constructs a CLIP processor which wraps a CLIP feature extractor and a CLIP tokenizer into a single processor.
@@ -33,77 +32,13 @@ class CLIPProcessor:
        tokenizer ([`CLIPTokenizer`]):
            The tokenizer is a required input.
    """
+    feature_extractor_class = "CLIPFeatureExtractor"
+    tokenizer_class = "CLIPTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, CLIPFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type CLIPFeatureExtractor, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, CLIPTokenizer):
-            raise ValueError(f"`tokenizer` has to be of type CLIPTokenizer, but is {type(tokenizer)}")
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a CLIP feature extractor object and CLIP tokenizer object to the directory `save_directory`, so that it
-        can be re-loaded using the [`~CLIPProcessor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`CLIPProcessor`] from a pretrained CLIP processor.
-        <Tip>
-        This class method is simply calling CLIPFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
-        CLIPTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
-        docstrings of the methods above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `clip-vit-base-patch32`, or
-                  namespaced under a user or organization name, like `openai/clip-vit-base-patch32`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = CLIPFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, text=None, images=None, return_tensors=None, **kwargs):
        """
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`

--- a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py
@@ -18,13 +18,11 @@ Processor class for LayoutLMv2.
 from typing import List, Optional, Union
 from ...file_utils import TensorType
+from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
-from .feature_extraction_layoutlmv2 import LayoutLMv2FeatureExtractor
-from .tokenization_layoutlmv2 import LayoutLMv2Tokenizer
-from .tokenization_layoutlmv2_fast import LayoutLMv2TokenizerFast
-class LayoutLMv2Processor:
+class LayoutLMv2Processor(ProcessorMixin):
    r"""
    Constructs a LayoutLMv2 processor which combines a LayoutLMv2 feature extractor and a LayoutLMv2 tokenizer into a
    single processor.
@@ -43,84 +41,8 @@ class LayoutLMv2Processor:
        tokenizer (`LayoutLMv2Tokenizer` or `LayoutLMv2TokenizerFast`):
            An instance of [`LayoutLMv2Tokenizer`] or [`LayoutLMv2TokenizerFast`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "LayoutLMv2FeatureExtractor"
-    def __init__(self, feature_extractor, tokenizer):
+    tokenizer_class = ("LayoutLMv2Tokenizer", "LayoutLMv2TokenizerFast")
-        if not isinstance(feature_extractor, LayoutLMv2FeatureExtractor):
-            raise ValueError(
-                f"`feature_extractor` has to be of type {LayoutLMv2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, (LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast)):
-            raise ValueError(
-                f"`tokenizer` has to be of type {LayoutLMv2Tokenizer.__class__} or {LayoutLMv2TokenizerFast.__class__}, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
-    def save_pretrained(self, save_directory):
-        """
-        Save a LayoutLMv2 feature_extractor object and LayoutLMv2 tokenizer object to the directory `save_directory`,
-        so that it can be re-loaded using the [`~LayoutLMv2Processor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
-        r"""
-        Instantiate a [`LayoutLMv2Processor`] from a pretrained LayoutLMv2 processor.
-        <Tip>
-        This class method is simply calling LayoutLMv2FeatureExtractor's
-        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and LayoutLMv2TokenizerFast's
-        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            use_fast (`bool`, *optional*, defaults to `True`):
-                Whether or not to instantiate a fast tokenizer.
-            **kwargs
-                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        if use_fast:
-            tokenizer = LayoutLMv2TokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        else:
-            tokenizer = LayoutLMv2Tokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(
        self,

--- a/src/transformers/models/layoutxlm/processing_layoutxlm.py
+++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py
@@ -17,15 +17,12 @@ Processor class for LayoutXLM.
 """
 from typing import List, Optional, Union
-from transformers.models.layoutlmv2.feature_extraction_layoutlmv2 import LayoutLMv2FeatureExtractor
 from ...file_utils import TensorType
+from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
-from .tokenization_layoutxlm import LayoutXLMTokenizer
-from .tokenization_layoutxlm_fast import LayoutXLMTokenizerFast
-class LayoutXLMProcessor:
+class LayoutXLMProcessor(ProcessorMixin):
    r"""
    Constructs a LayoutXLM processor which combines a LayoutXLM feature extractor and a LayoutXLM tokenizer into a
    single processor.
@@ -44,84 +41,8 @@ class LayoutXLMProcessor:
        tokenizer (`LayoutXLMTokenizer` or `LayoutXLMTokenizerFast`):
            An instance of [`LayoutXLMTokenizer`] or [`LayoutXLMTokenizerFast`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "LayoutLMv2FeatureExtractor"
-    def __init__(self, feature_extractor, tokenizer):
+    tokenizer_class = ("LayoutXLMTokenizer", "LayoutXLMTokenizerFast")
-        if not isinstance(feature_extractor, LayoutLMv2FeatureExtractor):
-            raise ValueError(
-                f"`feature_extractor` has to be of type {LayoutLMv2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, (LayoutXLMTokenizer, LayoutXLMTokenizerFast)):
-            raise ValueError(
-                f"`tokenizer` has to be of type {LayoutXLMTokenizer.__class__} or {LayoutXLMTokenizerFast.__class__}, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
-    def save_pretrained(self, save_directory):
-        """
-        Save a LayoutXLM feature_extractor object and LayoutXLM tokenizer object to the directory `save_directory`, so
-        that it can be re-loaded using the [`~LayoutXLMProcessor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
-        r"""
-        Instantiate a [`LayoutXLMProcessor`] from a pretrained LayoutXLM processor.
-        <Tip>
-        This class method is simply calling Layoutv2FeatureExtractor's
-        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and LayoutXLMTokenizerFast's
-        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            use_fast (`bool`, *optional*, defaults to `True`):
-                Whether or not to instantiate a fast tokenizer.
-            **kwargs
-                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        if use_fast:
-            tokenizer = LayoutXLMTokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        else:
-            tokenizer = LayoutXLMTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(
        self,

--- a/src/transformers/models/speech_to_text/processing_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/processing_speech_to_text.py
@@ -17,11 +17,10 @@ Speech processor class for Speech2Text
 """
 from contextlib import contextmanager
-from .feature_extraction_speech_to_text import Speech2TextFeatureExtractor
+from ...processing_utils import ProcessorMixin
-from .tokenization_speech_to_text import Speech2TextTokenizer
-class Speech2TextProcessor:
+class Speech2TextProcessor(ProcessorMixin):
    r"""
    Constructs a Speech2Text processor which wraps a Speech2Text feature extractor and a Speech2Text tokenizer into a
    single processor.
@@ -36,79 +35,13 @@ class Speech2TextProcessor:
        tokenizer (`Speech2TextTokenizer`):
            An instance of [`Speech2TextTokenizer`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "Speech2TextFeatureExtractor"
+    tokenizer_class = "Speech2TextTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, Speech2TextFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type {Speech2TextFeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, Speech2TextTokenizer):
-            raise ValueError(
-                f"`tokenizer` has to be of type {Speech2TextTokenizer.__class__}, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a Speech2Text feature extractor object and Speech2Text tokenizer object to the directory `save_directory`,
-        so that it can be re-loaded using the [`~Speech2TextProcessor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`Speech2TextProcessor`] from a pretrained Speech2Text processor.
-        <Tip>
-        This class method is simply calling Speech2TextFeatureExtractor's
-        [`~PreTrainedFeatureExtractor.from_pretrained`] and Speech2TextTokenizer's
-        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = Speech2TextFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = Speech2TextTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """
        When used in normal mode, this method forwards all its arguments to Speech2TextFeatureExtractor's

--- a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
+++ b/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
@@ -17,12 +17,10 @@ Speech processor class for Speech2Text2
 """
 from contextlib import contextmanager
-from ...feature_extraction_sequence_utils import SequenceFeatureExtractor
+from ...processing_utils import ProcessorMixin
-from ..auto.feature_extraction_auto import AutoFeatureExtractor
-from .tokenization_speech_to_text_2 import Speech2Text2Tokenizer
-class Speech2Text2Processor:
+class Speech2Text2Processor(ProcessorMixin):
    r"""
    Constructs a Speech2Text2 processor which wraps a Speech2Text2 feature extractor and a Speech2Text2 tokenizer into
    a single processor.
@@ -36,77 +34,13 @@ class Speech2Text2Processor:
        tokenizer (`Speech2Text2Tokenizer`):
            An instance of [`Speech2Text2Tokenizer`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "AutoFeatureExtractor"
+    tokenizer_class = "Speech2Text2Tokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, SequenceFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type {SequenceFeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, Speech2Text2Tokenizer):
-            raise ValueError(
-                f"`tokenizer` has to be of type {Speech2Text2Tokenizer.__class__}, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a Speech2Text2 feature extractor object and Speech2Text2 tokenizer object to the directory
-        `save_directory`, so that it can be re-loaded using the [`~Speech2Text2Processor.from_pretrained`] class
-        method.
-        <Tip>
-        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`Speech2Text2Processor`] from a pretrained Speech2Text2 processor.
-        <Tip>
-        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
-        Speech2Text2Tokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
-        docstrings of the methods above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = Speech2Text2Tokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """
        When used in normal mode, this method forwards all its arguments to AutoFeatureExtractor's

--- a/src/transformers/models/trocr/processing_trocr.py
+++ b/src/transformers/models/trocr/processing_trocr.py
@@ -17,15 +17,10 @@ Processor class for TrOCR.
 """
 from contextlib import contextmanager
-from transformers import AutoFeatureExtractor, AutoTokenizer
+from ...processing_utils import ProcessorMixin
-from transformers.feature_extraction_utils import FeatureExtractionMixin
-from transformers.models.roberta.tokenization_roberta import RobertaTokenizer
-from transformers.models.roberta.tokenization_roberta_fast import RobertaTokenizerFast
-from transformers.models.xlm_roberta.tokenization_xlm_roberta import XLMRobertaTokenizer
-from transformers.models.xlm_roberta.tokenization_xlm_roberta_fast import XLMRobertaTokenizerFast
-class TrOCRProcessor:
+class TrOCRProcessor(ProcessorMixin):
    r"""
    Constructs a TrOCR processor which wraps a vision feature extractor and a TrOCR tokenizer into a single processor.
@@ -39,78 +34,13 @@ class TrOCRProcessor:
        tokenizer ([`RobertaTokenizer`/`XLMRobertaTokenizer`]):
            An instance of [`RobertaTokenizer`/`XLMRobertaTokenizer`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "AutoFeatureExtractor"
+    tokenizer_class = "AutoTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, FeatureExtractionMixin):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type {FeatureExtractionMixin.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(
-            tokenizer, (RobertaTokenizer, RobertaTokenizerFast, XLMRobertaTokenizer, XLMRobertaTokenizerFast)
-        ):
-            raise ValueError(
-                f"`tokenizer` has to be of type {RobertaTokenizer.__class__} or {RobertaTokenizerFast.__class__} or {XLMRobertaTokenizer.__class__} or {XLMRobertaTokenizerFast.__class__}, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a TrOCR feature extractor object and TrOCR tokenizer object to the directory `save_directory`, so that it
-        can be re-loaded using the [`~TrOCRProcessor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`TrOCRProcessor`] from a pretrained TrOCR processor.
-        <Tip>
-        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
-        TrOCRTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
-        docstrings of the methods above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """
        When used in normal mode, this method forwards all its arguments to AutoFeatureExtractor's

--- a/src/transformers/models/vilt/processing_vilt.py
+++ b/src/transformers/models/vilt/processing_vilt.py
@@ -18,14 +18,12 @@ Processor class for ViLT.
 from typing import List, Optional, Union
-from transformers import BertTokenizerFast
 from ...file_utils import TensorType
+from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
-from .feature_extraction_vilt import ViltFeatureExtractor
-class ViltProcessor:
+class ViltProcessor(ProcessorMixin):
    r"""
    Constructs a ViLT processor which wraps a BERT tokenizer and ViLT feature extractor into a single processor.
@@ -38,75 +36,13 @@ class ViltProcessor:
        tokenizer (`BertTokenizerFast`):
            An instance of ['BertTokenizerFast`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "ViltFeatureExtractor"
+    tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, ViltFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type {ViltFeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, BertTokenizerFast):
-            raise ValueError(f"`tokenizer` has to be of type {BertTokenizerFast.__class__}, but is {type(tokenizer)}")
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a ViLT feature_extractor object and BERT tokenizer object to the directory `save_directory`, so that it
-        can be re-loaded using the [`~ViltProcessor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`ViltProcessor`] from a pretrained ViLT processor.
-        <Tip>
-        This class method is simply calling ViltFeatureExtractor's
-        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and BertTokenizerFast's
-        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = ViltFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(
        self,
        images,

--- a/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py
+++ b/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py
@@ -15,17 +15,12 @@
 """
 Processor class for VisionTextDualEncoder
 """
-from typing import Union
-from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
-from transformers.feature_extraction_utils import FeatureExtractionMixin
+from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding
-from ..auto.feature_extraction_auto import AutoFeatureExtractor
-from ..auto.tokenization_auto import AutoTokenizer
-class VisionTextDualEncoderProcessor:
+class VisionTextDualEncoderProcessor(ProcessorMixin):
    r"""
    Constructs a VisionTextDualEncoder processor which wraps a vision feature extractor and a tokenizer into a single
    processor.
@@ -40,82 +35,13 @@ class VisionTextDualEncoderProcessor:
        tokenizer ([`PreTrainedTokenizer`]):
            The tokenizer is a required input.
    """
+    feature_extractor_class = "AutoFeatureExtractor"
+    tokenizer_class = "AutoTokenizer"
-    def __init__(
+    def __init__(self, feature_extractor, tokenizer):
-        self, feature_extractor: FeatureExtractionMixin, tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
+        super().__init__(feature_extractor, tokenizer)
-    ):
-        if not isinstance(feature_extractor, FeatureExtractionMixin):
-            raise ValueError(
-                f"`feature_extractor` has to be of type {FeatureExtractionMixin.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
-            raise ValueError(
-                f"`tokenizer` has to be of type `PreTrainedTokenizer` or `PreTrainedTokenizerFast`, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a VisionTextDualEncoder feature extractor object and VisionTextDualEncoder tokenizer object to the
-        directory `save_directory`, so that it can be re-loaded using the
-        [`~VisionTextDualEncoderProcessor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`VisionTextDualEncoderProcessor`] from a pretrained VisionTextDualEncoder processor.
-        <Tip>
-        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
-        AutoTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
-        docstrings of the methods above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, text=None, images=None, return_tensors=None, **kwargs):
        """
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`

--- a/src/transformers/models/wav2vec2/processing_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py
@@ -18,14 +18,12 @@ Speech processor class for Wav2Vec2
 import warnings
 from contextlib import contextmanager
-from ...tokenization_utils import PreTrainedTokenizer
+from ...processing_utils import ProcessorMixin
-from ...tokenization_utils_fast import PreTrainedTokenizerFast
-from ..auto.tokenization_auto import AutoTokenizer
 from .feature_extraction_wav2vec2 import Wav2Vec2FeatureExtractor
 from .tokenization_wav2vec2 import Wav2Vec2CTCTokenizer
-class Wav2Vec2Processor:
+class Wav2Vec2Processor(ProcessorMixin):
    r"""
    Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor and a Wav2Vec2 CTC tokenizer into a single
    processor.
@@ -39,82 +37,17 @@ class Wav2Vec2Processor:
        tokenizer ([`PreTrainedTokenizer`]):
            An instance of [`PreTrainedTokenizer`]. The tokenizer is a required input.
    """
+    feature_extractor_class = "Wav2Vec2FeatureExtractor"
+    tokenizer_class = "AutoTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, Wav2Vec2FeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type {Wav2Vec2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
-            raise ValueError(
-                f"`tokenizer` has to be of type {PreTrainedTokenizer.__class__}, but is {type(tokenizer)}"
-            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
-    def save_pretrained(self, save_directory):
-        """
-        Save a Wav2Vec2 feature_extractor object and Wav2Vec2 tokenizer object to the directory `save_directory`, so
-        that it can be re-loaded using the [`~Wav2Vec2Processor.from_pretrained`] class method.
-        <Tip>
-        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
-        r"""
-        Instantiate a [`Wav2Vec2Processor`] from a pretrained Wav2Vec2 processor.
-        <Tip>
-        This class method is simply calling Wav2Vec2FeatureExtractor's
-        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and PreTrainedTokenizer's
-        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
-        above for more information.
-        </Tip>
-        Args:
-            pretrained_model_name_or_path (`str` or `os.PathLike`):
-                This can be either:
-                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
-                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
-                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
-                - a path to a *directory* containing a feature extractor file saved using the
-                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
-                - a path or url to a saved feature extractor JSON *file*, e.g.,
-                  `./my_model_directory/preprocessor_config.json`.
-            **kwargs
-                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
-                [`PreTrainedTokenizer`]
-        """
-        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        # load generic `AutoTokenizer`
-        # need fallback here for backward compatibility in case processor is
-        # loaded from just a tokenizer file that does not have a `tokenizer_class` attribute
-        # behavior should be deprecated in major future release
        try:
-            tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
+            return super().from_pretrained(pretrained_model_name_or_path, **kwargs)
        except OSError:
            warnings.warn(
                f"Loading a tokenizer inside {cls.__name__} from a config that does not"
@@ -124,9 +57,11 @@ class Wav2Vec2Processor:
                "file to suppress this warning: ",
                FutureWarning,
            )
+            feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
            tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
+            return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """

--- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
+++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
@@ -23,16 +23,16 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Union
 import numpy as np
-from ...feature_extraction_utils import FeatureExtractionMixin
 from ...file_utils import ModelOutput, requires_backends
-from ...tokenization_utils import PreTrainedTokenizer
+from ...processing_utils import ProcessorMixin
-from ..wav2vec2.feature_extraction_wav2vec2 import Wav2Vec2FeatureExtractor
-from ..wav2vec2.tokenization_wav2vec2 import Wav2Vec2CTCTokenizer
 if TYPE_CHECKING:
    from pyctcdecode import BeamSearchDecoderCTC
+    from ...feature_extraction_utils import FeatureExtractionMixin
+    from ...tokenization_utils import PreTrainedTokenizerBase
 @dataclass
 class Wav2Vec2DecoderWithLMOutput(ModelOutput):
@@ -47,7 +47,7 @@ class Wav2Vec2DecoderWithLMOutput(ModelOutput):
    text: Union[List[str], str]
-class Wav2Vec2ProcessorWithLM:
+class Wav2Vec2ProcessorWithLM(ProcessorMixin):
    r"""
    Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor, a Wav2Vec2 CTC tokenizer and a decoder
    with language model support into a single processor for language model boosted speech recognition decoding.
@@ -60,24 +60,18 @@ class Wav2Vec2ProcessorWithLM:
        decoder (`pyctcdecode.BeamSearchDecoderCTC`):
            An instance of [`pyctcdecode.BeamSearchDecoderCTC`]. The decoder is a required input.
    """
+    feature_extractor_class = "Wav2Vec2FeatureExtractor"
+    tokenizer_class = "Wav2Vec2CTCTokenizer"
    def __init__(
        self,
-        feature_extractor: FeatureExtractionMixin,
+        feature_extractor: "FeatureExtractionMixin",
-        tokenizer: PreTrainedTokenizer,
+        tokenizer: "PreTrainedTokenizerBase",
        decoder: "BeamSearchDecoderCTC",
    ):
        from pyctcdecode import BeamSearchDecoderCTC
-        if not isinstance(feature_extractor, Wav2Vec2FeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
-            raise ValueError(
-                f"`feature_extractor` has to be of type {Wav2Vec2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
-            )
-        if not isinstance(tokenizer, Wav2Vec2CTCTokenizer):
-            # TODO(PVP) - this can be relaxed in the future to allow other kinds of tokenizers
-            raise ValueError(
-                f"`tokenizer` has to be of type {Wav2Vec2CTCTokenizer.__class__}, but is {type(tokenizer)}"
-            )
        if not isinstance(decoder, BeamSearchDecoderCTC):
            raise ValueError(f"`decoder` has to be of type {BeamSearchDecoderCTC.__class__}, but is {type(decoder)}")
@@ -90,37 +84,11 @@ class Wav2Vec2ProcessorWithLM:
                f"Make sure to include {missing_decoder_tokens} in the decoder's alphabet."
            )
-        self.feature_extractor = feature_extractor
-        self.tokenizer = tokenizer
        self.decoder = decoder
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
-        """
+        super().save_pretrained(save_directory)
-        Save the Wav2Vec2 feature_extractor, a tokenizer object and a pyctcdecode decoder to the directory
-        `save_directory`, so that they can be re-loaded using the [`~Wav2Vec2ProcessorWithLM.from_pretrained`] class
-        method.
-        <Tip>
-        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained,`]
-        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`] and pyctcdecode's
-        [`pyctcdecode.BeamSearchDecoderCTC.save_to_dir`].
-        Please refer to the docstrings of the methods above for more information.
-        </Tip>
-        Args:
-            save_directory (`str` or `os.PathLike`):
-                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
-                be created if it does not exist).
-        """
-        self.feature_extractor._set_processor_class(self.__class__.__name__)
-        self.feature_extractor.save_pretrained(save_directory)
-        self.tokenizer._set_processor_class(self.__class__.__name__)
-        self.tokenizer.save_pretrained(save_directory)
        self.decoder.save_to_dir(save_directory)
    @classmethod
@@ -157,8 +125,7 @@ class Wav2Vec2ProcessorWithLM:
        requires_backends(cls, "pyctcdecode")
        from pyctcdecode import BeamSearchDecoderCTC
-        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        feature_extractor, tokenizer = super()._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
-        tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        if os.path.isdir(pretrained_model_name_or_path):
            decoder = BeamSearchDecoderCTC.load_from_dir(pretrained_model_name_or_path)

--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
+# coding=utf-8
+# Copyright 2022 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ Processing saving/loading class for common processors.
+"""
+import importlib.util
+from pathlib import Path
+# Comment to write
+spec = importlib.util.spec_from_file_location(
+    "transformers", Path(__file__).parent / "__init__.py", submodule_search_locations=[Path(__file__).parent]
+)
+transformers_module = spec.loader.load_module()
+AUTO_TO_BASE_CLASS_MAPPING = {
+    "AutoTokenizer": "PreTrainedTokenizerBase",
+    "AutoFeatureExtractor": "FeatureExtractionMixin",
+}
+class ProcessorMixin:
+    """
+    This is a mixin used to provide saving/loading functionality for all processor classes.
+    """
+    attributes = ["feature_extractor", "tokenizer"]
+    # Names need to be attr_class for attr in attributes
+    feature_extractor_class = None
+    tokenizer_class = None
+    # args have to match the attributes class attribute
+    def __init__(self, *args, **kwargs):
+        # Sanitize args and kwargs
+        for key in kwargs:
+            if key not in self.attributes:
+                raise TypeError(f"Unexepcted keyword argument {key}.")
+        for arg, attribute_name in zip(args, self.attributes):
+            if attribute_name in kwargs:
+                raise TypeError(f"Got multiple values for argument {attribute_name}.")
+            else:
+                kwargs[attribute_name] = arg
+        if len(kwargs) != len(self.attributes):
+            raise ValueError(
+                f"This processor requires {len(self.attributes)} arguments: {', '.join(self.attributes)}. Got "
+                f"{len(args)} arguments instead."
+            )
+        # Check each arg is of the proper class (this will also catch a user initializing in the wrong order)
+        for attribute_name, arg in kwargs.items():
+            class_name = getattr(self, f"{attribute_name}_class")
+            # Nothing is ever going to be an instance of "AutoXxx", in that case we check the base class.
+            class_name = AUTO_TO_BASE_CLASS_MAPPING.get(class_name, class_name)
+            if isinstance(class_name, tuple):
+                proper_class = tuple(getattr(transformers_module, n) for n in class_name if n is not None)
+            else:
+                proper_class = getattr(transformers_module, class_name)
+            if not isinstance(arg, proper_class):
+                raise ValueError(
+                    f"Received a {type(arg).__name__} for argument {attribute_name}, but a {class_name} was expected."
+                )
+            setattr(self, attribute_name, arg)
+    def __repr__(self):
+        attributes_repr = [f"- {name}: {repr(getattr(self, name))}" for name in self.attributes]
+        attributes_repr = "\n".join(attributes_repr)
+        return f"{self.__class__.__name__}:\n{attributes_repr}"
+    def save_pretrained(self, save_directory):
+        """
+        Saves the attributes of this processor (feature extractor, tokenizer...) in the specified directory so that it
+        can be reloaded using the [`~ProcessorMixin.from_pretrained`] method.
+        <Tip>
+        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
+        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
+        above for more information.
+        </Tip>
+        Args:
+            save_directory (`str` or `os.PathLike`):
+                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
+                be created if it does not exist).
+        """
+        for attribute_name in self.attributes:
+            attribute = getattr(self, attribute_name)
+            # Include the processor class in the attribute config so this processor can then be reloaded with the
+            # `AutoProcessor` API.
+            if hasattr(attribute, "_set_processor_class"):
+                attribute._set_processor_class(self.__class__.__name__)
+            attribute.save_pretrained(save_directory)
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        r"""
+        Instantiate a processor associated with a pretrained model.
+        <Tip>
+        This class method is simply calling the feature extractor
+        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and the tokenizer
+        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`] methods. Please refer to the docstrings of the
+        methods above for more information.
+        </Tip>
+        Args:
+            pretrained_model_name_or_path (`str` or `os.PathLike`):
+                This can be either:
+                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
+                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
+                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
+                - a path to a *directory* containing a feature extractor file saved using the
+                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
+                - a path or url to a saved feature extractor JSON *file*, e.g.,
+                  `./my_model_directory/preprocessor_config.json`.
+            **kwargs
+                Additional keyword arguments passed along to both
+                [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and
+                [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
+        """
+        args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
+        return cls(*args)
+    @classmethod
+    def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        args = []
+        for attribute_name in cls.attributes:
+            class_name = getattr(cls, f"{attribute_name}_class")
+            if isinstance(class_name, tuple):
+                classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name)
+                use_fast = kwargs.get("use_fast", True)
+                if use_fast and classes[1] is not None:
+                    attribute_class = classes[1]
+                else:
+                    attribute_class = classes[0]
+            else:
+                attribute_class = getattr(transformers_module, class_name)
+            args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
+        return args