Add LayoutXLMProcessor (and LayoutXLMTokenizer, LayoutXLMTokenizerFast) (#14115)

* Add LayoutXLMTokenizer and LayoutXLMTokenizerFast * Fix styling issues * Fix more styling issues * Fix more styling issues * Fix docstring * Fix unit tests * Fix docs * Fix unit tests * Fix typos and styling issues * Fix styling issues * Fix docstring * Make all tests of test_tokenization_layoutxlm pass * Add LayoutXLMProcessor * Make fixup * Make all LayoutXLMProcessor tests pass * Minor fixes * Leave LayoutLMv2Processor tests unchanged * Fix code quality * Move LayoutXLM tokenizers and processor to separate folder * Fix code quality * Apply suggestions from code review * Replace assertions by value errors * Remove methods from fast tokenizer Co-authored-by: King Yiu Suen <kingyiusuen@gmail.com>

Add LayoutXLMProcessor (and LayoutXLMTokenizer, LayoutXLMTokenizerFast) (#14115)
* Add LayoutXLMTokenizer and LayoutXLMTokenizerFast * Fix styling issues * Fix more styling issues * Fix more styling issues * Fix docstring * Fix unit tests * Fix docs * Fix unit tests * Fix typos and styling issues * Fix styling issues * Fix docstring * Make all tests of test_tokenization_layoutxlm pass * Add LayoutXLMProcessor * Make fixup * Make all LayoutXLMProcessor tests pass * Minor fixes * Leave LayoutLMv2Processor tests unchanged * Fix code quality * Move LayoutXLM tokenizers and processor to separate folder * Fix code quality * Apply suggestions from code review * Replace assertions by value errors * Remove methods from fast tokenizer Co-authored-by: King Yiu Suen <kingyiusuen@gmail.com>
5f789a68 · NielsRogge · GitHub · 558f8543 · 5f789a68 · 5f789a68
Unverified Commit 5f789a68 authored Nov 03, 2021 by NielsRogge Committed by GitHub Nov 03, 2021
14 changed files
--- a/docs/source/model_doc/layoutxlm.rst
+++ b/docs/source/model_doc/layoutxlm.rst
@@ -40,17 +40,45 @@ One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like
    model = LayoutLMv2Model.from_pretrained('microsoft/layoutxlm-base') 
-Note that LayoutXLM requires a different tokenizer, based on :class:`~transformers.XLMRobertaTokenizer`. You can
+Note that LayoutXLM has its own tokenizer, based on
-initialize it as follows:
+:class:`~transformers.LayoutXLMTokenizer`/:class:`~transformers.LayoutXLMTokenizerFast`. You can initialize it as
+follows:
 .. code-block::
-    from transformers import AutoTokenizer
+    from transformers import LayoutXLMTokenizer
-    tokenizer = AutoTokenizer.from_pretrained('microsoft/layoutxlm-base') 
+    tokenizer = LayoutXLMTokenizer.from_pretrained('microsoft/layoutxlm-base') 
+Similar to LayoutLMv2, you can use :class:`~transformers.LayoutXLMProcessor` (which internally applies
+:class:`~transformers.LayoutLMv2FeatureExtractor` and
+:class:`~transformers.LayoutXLMTokenizer`/:class:`~transformers.LayoutXLMTokenizerFast` in sequence) to prepare all
+data for the model.
 As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to :doc:`LayoutLMv2's documentation page
 <layoutlmv2>` for all tips, code examples and notebooks.
 This model was contributed by `nielsr <https://huggingface.co/nielsr>`__. The original code can be found `here
 <https://github.com/microsoft/unilm>`__.
+LayoutXLMTokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.LayoutXLMTokenizer
+    :members: __call__, build_inputs_with_special_tokens, get_special_tokens_mask,
+        create_token_type_ids_from_sequences, save_vocabulary
+LayoutXLMTokenizerFast
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.LayoutXLMTokenizerFast
+    :members: __call__
+LayoutXLMProcessor
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: transformers.LayoutXLMProcessor
+    :members: __call__
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -229,6 +229,7 @@ _import_structure = {
        "LayoutLMv2Processor",
        "LayoutLMv2Tokenizer",
    ],
+    "models.layoutxlm": ["LayoutXLMProcessor"],
    "models.led": ["LED_PRETRAINED_CONFIG_ARCHIVE_MAP", "LEDConfig", "LEDTokenizer"],
    "models.longformer": ["LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "LongformerConfig", "LongformerTokenizer"],
    "models.luke": ["LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP", "LukeConfig", "LukeTokenizer"],
@@ -365,6 +366,7 @@ if is_sentencepiece_available():
    _import_structure["models.big_bird"].append("BigBirdTokenizer")
    _import_structure["models.camembert"].append("CamembertTokenizer")
    _import_structure["models.deberta_v2"].append("DebertaV2Tokenizer")
+    _import_structure["models.layoutxlm"].append("LayoutXLMTokenizer")
    _import_structure["models.m2m_100"].append("M2M100Tokenizer")
    _import_structure["models.marian"].append("MarianTokenizer")
    _import_structure["models.mbart"].append("MBartTokenizer")
@@ -411,6 +413,7 @@ if is_tokenizers_available():
    _import_structure["models.herbert"].append("HerbertTokenizerFast")
    _import_structure["models.layoutlm"].append("LayoutLMTokenizerFast")
    _import_structure["models.layoutlmv2"].append("LayoutLMv2TokenizerFast")
+    _import_structure["models.layoutxlm"].append("LayoutXLMTokenizerFast")
    _import_structure["models.led"].append("LEDTokenizerFast")
    _import_structure["models.longformer"].append("LongformerTokenizerFast")
    _import_structure["models.lxmert"].append("LxmertTokenizerFast")
@@ -477,6 +480,7 @@ if is_vision_available():
    _import_structure["models.detr"].append("DetrFeatureExtractor")
    _import_structure["models.layoutlmv2"].append("LayoutLMv2FeatureExtractor")
    _import_structure["models.layoutlmv2"].append("LayoutLMv2Processor")
+    _import_structure["models.layoutxlm"].append("LayoutXLMProcessor")
    _import_structure["models.segformer"].append("SegformerFeatureExtractor")
    _import_structure["models.vit"].append("ViTFeatureExtractor")
 else:
@@ -2140,6 +2144,7 @@ if TYPE_CHECKING:
        LayoutLMv2Processor,
        LayoutLMv2Tokenizer,
    )
+    from .models.layoutxlm import LayoutXLMProcessor
    from .models.led import LED_PRETRAINED_CONFIG_ARCHIVE_MAP, LEDConfig, LEDTokenizer
    from .models.longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig, LongformerTokenizer
    from .models.luke import LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP, LukeConfig, LukeTokenizer
@@ -2266,6 +2271,7 @@ if TYPE_CHECKING:
        from .models.big_bird import BigBirdTokenizer
        from .models.camembert import CamembertTokenizer
        from .models.deberta_v2 import DebertaV2Tokenizer
+        from .models.layoutxlm import LayoutXLMTokenizer
        from .models.m2m_100 import M2M100Tokenizer
        from .models.marian import MarianTokenizer
        from .models.mbart import MBart50Tokenizer, MBartTokenizer
@@ -2302,6 +2308,7 @@ if TYPE_CHECKING:
        from .models.herbert import HerbertTokenizerFast
        from .models.layoutlm import LayoutLMTokenizerFast
        from .models.layoutlmv2 import LayoutLMv2TokenizerFast
+        from .models.layoutxlm import LayoutXLMTokenizerFast
        from .models.led import LEDTokenizerFast
        from .models.longformer import LongformerTokenizerFast
        from .models.lxmert import LxmertTokenizerFast
@@ -2349,6 +2356,7 @@ if TYPE_CHECKING:
        from .models.deit import DeiTFeatureExtractor
        from .models.detr import DetrFeatureExtractor
        from .models.layoutlmv2 import LayoutLMv2FeatureExtractor, LayoutLMv2Processor
+        from .models.layoutxlm import LayoutXLMProcessor
        from .models.segformer import SegformerFeatureExtractor
        from .models.vit import ViTFeatureExtractor
    else:

--- a/src/transformers/convert_slow_tokenizer.py
+++ b/src/transformers/convert_slow_tokenizer.py
@@ -944,6 +944,7 @@ SLOW_TO_FAST_CONVERTERS = {
    "HerbertTokenizer": HerbertConverter,
    "LayoutLMTokenizer": BertConverter,
    "LayoutLMv2Tokenizer": BertConverter,
+    "LayoutXLMTokenizer": XLMRobertaConverter,
    "LongformerTokenizer": RobertaConverter,
    "LEDTokenizer": RobertaConverter,
    "LxmertTokenizer": BertConverter,

--- a/src/transformers/models/__init__.py
+++ b/src/transformers/models/__init__.py
@@ -59,6 +59,7 @@ from . import (
    ibert,
    layoutlm,
    layoutlmv2,
+    layoutxlm,
    led,
    longformer,
    luke,

--- a/src/transformers/models/auto/tokenization_auto.py
+++ b/src/transformers/models/auto/tokenization_auto.py
@@ -124,6 +124,7 @@ else:
            ("lxmert", ("LxmertTokenizer", "LxmertTokenizerFast" if is_tokenizers_available() else None)),
            ("layoutlm", ("LayoutLMTokenizer", "LayoutLMTokenizerFast" if is_tokenizers_available() else None)),
            ("layoutlmv2", ("LayoutLMv2Tokenizer", "LayoutLMv2TokenizerFast" if is_tokenizers_available() else None)),
+            ("layoutxlm", ("LayoutXLMTokenizer", "LayoutXLMTokenizerFast" if is_tokenizers_available() else None)),
            (
                "dpr",
                (

--- a/src/transformers/models/layoutxlm/__init__.py
+++ b/src/transformers/models/layoutxlm/__init__.py
+# flake8: noqa
+# There's no way to ignore "F401 '...' imported but unused" warnings in this
+# module, but to preserve other warnings. So, don't check this module at all.
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+from ...file_utils import (
+    _LazyModule,
+    is_sentencepiece_available,
+    is_tokenizers_available,
+    is_torch_available,
+    is_vision_available,
+)
+_import_structure = {}
+if is_sentencepiece_available():
+    _import_structure["tokenization_layoutxlm"] = ["LayoutXLMTokenizer"]
+if is_tokenizers_available():
+    _import_structure["tokenization_layoutxlm_fast"] = ["LayoutXLMTokenizerFast"]
+if is_vision_available():
+    _import_structure["processing_layoutxlm"] = ["LayoutXLMProcessor"]
+if TYPE_CHECKING:
+    if is_sentencepiece_available():
+        from .tokenization_layoutxlm import LayoutXLMTokenizer
+    if is_tokenizers_available():
+        from .tokenization_layoutxlm_fast import LayoutXLMTokenizerFast
+    if is_vision_available():
+        from .processing_layoutlmv2 import LayoutXLMProcessor
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
--- a/src/transformers/models/layoutxlm/processing_layoutxlm.py
+++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py
+# coding=utf-8
+# Copyright 2021 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Processor class for LayoutXLM.
+"""
+from typing import List, Optional, Union
+from transformers.models.layoutlmv2.feature_extraction_layoutlmv2 import LayoutLMv2FeatureExtractor
+from ...file_utils import TensorType
+from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
+from .tokenization_layoutxlm import LayoutXLMTokenizer
+from .tokenization_layoutxlm_fast import LayoutXLMTokenizerFast
+class LayoutXLMProcessor:
+    r"""
+    Constructs a LayoutXLM processor which combines a LayoutXLM feature extractor and a LayoutXLM tokenizer into a
+    single processor.
+    :class:`~transformers.LayoutXLMProcessor` offers all the functionalities you need to prepare data for the model.
+    It first uses :class:`~transformers.LayoutLMv2FeatureExtractor` to resize document images to a fixed size, and
+    optionally applies OCR to get words and normalized bounding boxes. These are then provided to
+    :class:`~transformers.LayoutXLMTokenizer` or :class:`~transformers.LayoutXLMTokenizerFast`, which turns the words
+    and bounding boxes into token-level :obj:`input_ids`, :obj:`attention_mask`, :obj:`token_type_ids`, :obj:`bbox`.
+    Optionally, one can provide integer :obj:`word_labels`, which are turned into token-level :obj:`labels` for token
+    classification tasks (such as FUNSD, CORD).
+    Args:
+        feature_extractor (:obj:`LayoutLMv2FeatureExtractor`):
+            An instance of :class:`~transformers.LayoutLMv2FeatureExtractor`. The feature extractor is a required
+            input.
+        tokenizer (:obj:`LayoutXLMTokenizer` or :obj:`LayoutXLMTokenizerFast`):
+            An instance of :class:`~transformers.LayoutXLMTokenizer` or :class:`~transformers.LayoutXLMTokenizerFast`.
+            The tokenizer is a required input.
+    """
+    def __init__(self, feature_extractor, tokenizer):
+        if not isinstance(feature_extractor, LayoutLMv2FeatureExtractor):
+            raise ValueError(
+                f"`feature_extractor` has to be of type {LayoutLMv2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
+            )
+        if not isinstance(tokenizer, (LayoutXLMTokenizer, LayoutXLMTokenizerFast)):
+            raise ValueError(
+                f"`tokenizer` has to be of type {LayoutXLMTokenizer.__class__} or {LayoutXLMTokenizerFast.__class__}, but is {type(tokenizer)}"
+            )
+        self.feature_extractor = feature_extractor
+        self.tokenizer = tokenizer
+    def save_pretrained(self, save_directory):
+        """
+        Save a LayoutXLM feature_extractor object and LayoutXLM tokenizer object to the directory ``save_directory``,
+        so that it can be re-loaded using the :func:`~transformers.LayoutXLMProcessor.from_pretrained` class method.
+        .. note::
+            This class method is simply calling
+            :meth:`~transformers.feature_extraction_utils.FeatureExtractionMixin.save_pretrained` and
+            :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.save_pretrained`. Please refer to the
+            docstrings of the methods above for more information.
+        Args:
+            save_directory (:obj:`str` or :obj:`os.PathLike`):
+                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
+                be created if it does not exist).
+        """
+        self.feature_extractor.save_pretrained(save_directory)
+        self.tokenizer.save_pretrained(save_directory)
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
+        r"""
+        Instantiate a :class:`~transformers.LayoutXLMProcessor` from a pretrained LayoutXLM processor.
+        .. note::
+            This class method is simply calling Layoutv2FeatureExtractor's
+            :meth:`~transformers.feature_extraction_utils.FeatureExtractionMixin.from_pretrained` and
+            LayoutXLMTokenizerFast's :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.from_pretrained`.
+            Please refer to the docstrings of the methods above for more information.
+        Args:
+            pretrained_model_name_or_path (:obj:`str` or :obj:`os.PathLike`):
+                This can be either:
+                - a string, the `model id` of a pretrained feature_extractor hosted inside a model repo on
+                  huggingface.co. Valid model ids can be located at the root-level, like ``bert-base-uncased``, or
+                  namespaced under a user or organization name, like ``dbmdz/bert-base-german-cased``.
+                - a path to a `directory` containing a feature extractor file saved using the
+                  :meth:`~transformers.SequenceFeatureExtractor.save_pretrained` method, e.g.,
+                  ``./my_model_directory/``.
+                - a path or url to a saved feature extractor JSON `file`, e.g.,
+                  ``./my_model_directory/preprocessor_config.json``.
+            use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
+                Whether or not to instantiate a fast tokenizer.
+            **kwargs
+                Additional keyword arguments passed along to both :class:`~transformers.SequenceFeatureExtractor` and
+                :class:`~transformers.PreTrainedTokenizer`
+        """
+        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        if use_fast:
+            tokenizer = LayoutXLMTokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        else:
+            tokenizer = LayoutXLMTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
+    def __call__(
+        self,
+        images,
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
+        text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None,
+        boxes: Union[List[List[int]], List[List[List[int]]]] = None,
+        word_labels: Optional[Union[List[int], List[List[int]]]] = None,
+        add_special_tokens: bool = True,
+        padding: Union[bool, str, PaddingStrategy] = False,
+        truncation: Union[bool, str, TruncationStrategy] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        pad_to_multiple_of: Optional[int] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        **kwargs
+    ) -> BatchEncoding:
+        """
+        This method first forwards the :obj:`images` argument to
+        :meth:`~transformers.LayoutLMv2FeatureExtractor.__call__`. In case :class:`~LayoutLMv2FeatureExtractor` was
+        initialized with :obj:`apply_ocr` set to ``True``, it passes the obtained words and bounding boxes along with
+        the additional arguments to :meth:`~transformers.LayoutXLMTokenizer.__call__` and returns the output, together
+        with resized :obj:`images`. In case :class:`~LayoutLMv2FeatureExtractor` was initialized with :obj:`apply_ocr`
+        set to ``False``, it passes the words (:obj:`text`/:obj:`text_pair`) and :obj:`boxes` specified by the user
+        along with the additional arguments to :meth:`~transformers.LayoutXLMTokenizer.__call__` and returns the
+        output, together with resized :obj:`images`.
+        Please refer to the docstring of the above two methods for more information.
+        """
+        # verify input
+        if self.feature_extractor.apply_ocr and (boxes is not None):
+            raise ValueError(
+                "You cannot provide bounding boxes "
+                "if you initialized the feature extractor with apply_ocr set to True."
+            )
+        if self.feature_extractor.apply_ocr and (word_labels is not None):
+            raise ValueError(
+                "You cannot provide word labels "
+                "if you initialized the feature extractor with apply_ocr set to True."
+            )
+        # first, apply the feature extractor
+        features = self.feature_extractor(images=images, return_tensors=return_tensors)
+        # second, apply the tokenizer
+        if text is not None and self.feature_extractor.apply_ocr and text_pair is None:
+            if isinstance(text, str):
+                text = [text]  # add batch dimension (as the feature extractor always adds a batch dimension)
+            text_pair = features["words"]
+        encoded_inputs = self.tokenizer(
+            text=text if text is not None else features["words"],
+            text_pair=text_pair if text_pair is not None else None,
+            boxes=boxes if boxes is not None else features["boxes"],
+            word_labels=word_labels,
+            add_special_tokens=add_special_tokens,
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+            stride=stride,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_token_type_ids=return_token_type_ids,
+            return_attention_mask=return_attention_mask,
+            return_overflowing_tokens=return_overflowing_tokens,
+            return_special_tokens_mask=return_special_tokens_mask,
+            return_offsets_mapping=return_offsets_mapping,
+            return_length=return_length,
+            verbose=verbose,
+            return_tensors=return_tensors,
+            **kwargs,
+        )
+        # add pixel values
+        encoded_inputs["image"] = features.pop("pixel_values")
+        return encoded_inputs
--- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py
+++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py
--- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py
+++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py
--- a/src/transformers/utils/dummy_sentencepiece_objects.py
+++ b/src/transformers/utils/dummy_sentencepiece_objects.py
@@ -65,6 +65,15 @@ class DebertaV2Tokenizer:
        requires_backends(cls, ["sentencepiece"])
+class LayoutXLMTokenizer:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["sentencepiece"])
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["sentencepiece"])
 class M2M100Tokenizer:
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["sentencepiece"])

--- a/src/transformers/utils/dummy_tokenizers_objects.py
+++ b/src/transformers/utils/dummy_tokenizers_objects.py
@@ -200,6 +200,15 @@ class LayoutLMv2TokenizerFast:
        requires_backends(cls, ["tokenizers"])
+class LayoutXLMTokenizerFast:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["tokenizers"])
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["tokenizers"])
 class LEDTokenizerFast:
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["tokenizers"])

--- a/src/transformers/utils/dummy_vision_objects.py
+++ b/src/transformers/utils/dummy_vision_objects.py
@@ -50,6 +50,15 @@ class LayoutLMv2Processor:
        requires_backends(cls, ["vision"])
+class LayoutXLMProcessor:
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["vision"])
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["vision"])
 class SegformerFeatureExtractor:
    def __init__(self, *args, **kwargs):
        requires_backends(self, ["vision"])

--- a/tests/test_processor_layoutxlm.py
+++ b/tests/test_processor_layoutxlm.py
--- a/tests/test_tokenization_layoutxlm.py
+++ b/tests/test_tokenization_layoutxlm.py