Deprecate models (#24787)

* Deprecate some models * Fix imports * Fix inits too * Remove tests * Add deprecated banner to documentation * Remove from init * Fix auto classes * Style * Remote upgrade strategy 1 * Remove site package cache * Revert this part * Fix typo... * Update utils * Update docs/source/en/model_doc/bort.md Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr> * Address review comments * With all files saved --------- Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr>

Deprecate models (#24787)
* Deprecate some models * Fix imports * Fix inits too * Remove tests * Add deprecated banner to documentation * Remove from init * Fix auto classes * Style * Remote upgrade strategy 1 * Remove site package cache * Revert this part * Fix typo... * Update utils * Update docs/source/en/model_doc/bort.md Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr> * Address review comments * With all files saved --------- Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr>
9342c8fb · Sylvain Gugger · GitHub · 717dadc6 · 717dadc6 · 717dadc6
Unverified Commit 9342c8fb authored Jul 13, 2023 by Sylvain Gugger Committed by GitHub Jul 13, 2023
15 changed files
--- a/tests/models/mctct/test_feature_extraction_mctct.py
+++ b/tests/models/mctct/test_feature_extraction_mctct.py
-# coding=utf-8
-# Copyright 2022 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import itertools
-import random
-import unittest
-
-import numpy as np
-
-from transformers import MCTCTFeatureExtractor
-from transformers.testing_utils import require_torch
-
-from ...test_sequence_feature_extraction_common import SequenceFeatureExtractionTestMixin
-
-
-global_rng = random.Random()
-
-
-def floats_list(shape, scale=1.0, rng=None, name=None):
-    """Creates a random float32 tensor"""
-    if rng is None:
-        rng = global_rng
-
-    values = []
-    for _batch_idx in range(shape[0]):
-        values.append([])
-        for _ in range(shape[1]):
-            values[-1].append(rng.random() * scale)
-
-    return values
-
-
-@require_torch
-class MCTCTFeatureExtractionTester(unittest.TestCase):
-    def __init__(
-        self,
-        parent,
-        batch_size=7,
-        min_seq_length=400,
-        max_seq_length=2000,
-        feature_size=24,
-        num_mel_bins=24,
-        padding_value=0.0,
-        sampling_rate=16_000,
-        return_attention_mask=True,
-        do_normalize=True,
-    ):
-        self.parent = parent
-        self.batch_size = batch_size
-        self.min_seq_length = min_seq_length
-        self.max_seq_length = max_seq_length
-        self.seq_length_diff = (self.max_seq_length - self.min_seq_length) // (self.batch_size - 1)
-        self.feature_size = feature_size
-        self.num_mel_bins = num_mel_bins
-        self.padding_value = padding_value
-        self.sampling_rate = sampling_rate
-        self.return_attention_mask = return_attention_mask
-        self.do_normalize = do_normalize
-
-    def prepare_feat_extract_dict(self):
-        return {
-            "feature_size": self.feature_size,
-            "num_mel_bins": self.num_mel_bins,
-            "padding_value": self.padding_value,
-            "sampling_rate": self.sampling_rate,
-            "return_attention_mask": self.return_attention_mask,
-            "do_normalize": self.do_normalize,
-        }
-
-    def prepare_inputs_for_common(self, equal_length=False, numpify=False):
-        def _flatten(list_of_lists):
-            return list(itertools.chain(*list_of_lists))
-
-        if equal_length:
-            speech_inputs = [floats_list((self.max_seq_length, self.feature_size)) for _ in range(self.batch_size)]
-        else:
-            # make sure that inputs increase in size
-            speech_inputs = [
-                floats_list((x, self.feature_size))
-                for x in range(self.min_seq_length, self.max_seq_length, self.seq_length_diff)
-            ]
-        if numpify:
-            speech_inputs = [np.asarray(x) for x in speech_inputs]
-        return speech_inputs
-
-
-@require_torch
-class MCTCTFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.TestCase):
-    feature_extraction_class = MCTCTFeatureExtractor
-
-    def setUp(self):
-        self.feat_extract_tester = MCTCTFeatureExtractionTester(self)
-
-    def _check_zero_mean_unit_variance(self, input_vector):
-        self.assertTrue(np.all(np.mean(input_vector) < 1e-3))
-        self.assertTrue(np.all(np.abs(np.var(input_vector) - 1) < 1e-3))
-
-    def test_call(self):
-        # Tests that all call wrap to encode_plus and batch_encode_plus
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        # create three inputs of length 800, 1000, and 1200
-        speech_inputs = [floats_list((1, x))[0] for x in range(800, 1400, 200)]
-        np_speech_inputs = [np.asarray(speech_input) for speech_input in speech_inputs]
-
-        # Test feature size
-        input_features = feature_extractor(np_speech_inputs, padding=True, return_tensors="np").input_features
-        self.assertTrue(input_features.ndim == 3)
-        self.assertTrue(input_features.shape[-1] == feature_extractor.feature_size)
-
-        # Test not batched input
-        encoded_sequences_1 = feature_extractor(speech_inputs[0], return_tensors="np").input_features
-        encoded_sequences_2 = feature_extractor(np_speech_inputs[0], return_tensors="np").input_features
-        self.assertTrue(np.allclose(encoded_sequences_1, encoded_sequences_2, atol=1e-3))
-
-        # Test batched
-        encoded_sequences_1 = feature_extractor(speech_inputs, return_tensors="np").input_features
-        encoded_sequences_2 = feature_extractor(np_speech_inputs, return_tensors="np").input_features
-        for enc_seq_1, enc_seq_2 in zip(encoded_sequences_1, encoded_sequences_2):
-            self.assertTrue(np.allclose(enc_seq_1, enc_seq_2, atol=1e-3))
-
-        # Test 2-D numpy arrays are batched.
-        speech_inputs = [floats_list((1, x))[0] for x in (800, 800, 800)]
-        np_speech_inputs = np.asarray(speech_inputs)
-        encoded_sequences_1 = feature_extractor(speech_inputs, return_tensors="np").input_features
-        encoded_sequences_2 = feature_extractor(np_speech_inputs, return_tensors="np").input_features
-        for enc_seq_1, enc_seq_2 in zip(encoded_sequences_1, encoded_sequences_2):
-            self.assertTrue(np.allclose(enc_seq_1, enc_seq_2, atol=1e-3))
-
-    def test_cepstral_mean_and_variance_normalization(self):
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        speech_inputs = [floats_list((1, x))[0] for x in range(8000, 14000, 2000)]
-
-        paddings = ["longest", "max_length", "do_not_pad"]
-        max_lengths = [None, 16, None]
-        for max_length, padding in zip(max_lengths, paddings):
-            inputs = feature_extractor(
-                speech_inputs,
-                padding=padding,
-                max_length=max_length,
-                return_attention_mask=True,
-                truncation=max_length is not None,  # reference to #16419
-            )
-            input_features = inputs.input_features
-            attention_mask = inputs.attention_mask
-            fbank_feat_lengths = [np.sum(x) for x in attention_mask]
-            self._check_zero_mean_unit_variance(input_features[0][: fbank_feat_lengths[0]])
-            self._check_zero_mean_unit_variance(input_features[1][: fbank_feat_lengths[1]])
-            self._check_zero_mean_unit_variance(input_features[2][: fbank_feat_lengths[2]])
-
-    def test_cepstral_mean_and_variance_normalization_np(self):
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        speech_inputs = [floats_list((1, x))[0] for x in range(8000, 14000, 2000)]
-
-        paddings = ["longest", "max_length", "do_not_pad"]
-        max_lengths = [None, 16, None]
-        for max_length, padding in zip(max_lengths, paddings):
-            inputs = feature_extractor(
-                speech_inputs,
-                max_length=max_length,
-                padding=padding,
-                return_tensors="np",
-                return_attention_mask=True,
-                truncation=max_length is not None,
-            )
-            input_features = inputs.input_features
-            attention_mask = inputs.attention_mask
-            fbank_feat_lengths = [np.sum(x) for x in attention_mask]
-
-            self._check_zero_mean_unit_variance(input_features[0][: fbank_feat_lengths[0]])
-            self.assertTrue(input_features[0][fbank_feat_lengths[0] :].sum() < 1e-6)
-            self._check_zero_mean_unit_variance(input_features[1][: fbank_feat_lengths[1]])
-            self.assertTrue(input_features[0][fbank_feat_lengths[1] :].sum() < 1e-6)
-            self._check_zero_mean_unit_variance(input_features[2][: fbank_feat_lengths[2]])
-
-    def test_cepstral_mean_and_variance_normalization_trunc_max_length(self):
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        speech_inputs = [floats_list((1, x))[0] for x in range(8000, 14000, 2000)]
-        inputs = feature_extractor(
-            speech_inputs,
-            padding="max_length",
-            max_length=4,
-            truncation=True,
-            return_tensors="np",
-            return_attention_mask=True,
-        )
-        input_features = inputs.input_features
-        attention_mask = inputs.attention_mask
-        fbank_feat_lengths = np.sum(attention_mask == 1, axis=1)
-
-        self._check_zero_mean_unit_variance(input_features[0, : fbank_feat_lengths[0]])
-        self._check_zero_mean_unit_variance(input_features[1])
-        self._check_zero_mean_unit_variance(input_features[2])
-
-    def test_cepstral_mean_and_variance_normalization_trunc_longest(self):
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        speech_inputs = [floats_list((1, x))[0] for x in range(8000, 14000, 2000)]
-        inputs = feature_extractor(
-            speech_inputs,
-            padding="longest",
-            max_length=4,
-            truncation=True,
-            return_tensors="np",
-            return_attention_mask=True,
-        )
-        input_features = inputs.input_features
-        attention_mask = inputs.attention_mask
-        fbank_feat_lengths = np.sum(attention_mask == 1, axis=1)
-
-        self._check_zero_mean_unit_variance(input_features[0, : fbank_feat_lengths[0]])
-        self._check_zero_mean_unit_variance(input_features[1, : fbank_feat_lengths[1]])
-        self._check_zero_mean_unit_variance(input_features[2])
-
-        # make sure that if max_length < longest -> then pad to max_length
-        self.assertEqual(input_features.shape, (3, 4, 24))
-
-        speech_inputs = [floats_list((1, x))[0] for x in range(8000, 14000, 2000)]
-        inputs = feature_extractor(
-            speech_inputs,
-            padding="longest",
-            max_length=16,
-            truncation=True,
-            return_tensors="np",
-            return_attention_mask=True,
-        )
-        input_features = inputs.input_features
-        attention_mask = inputs.attention_mask
-        fbank_feat_lengths = np.sum(attention_mask == 1, axis=1)
-
-        self._check_zero_mean_unit_variance(input_features[0, : fbank_feat_lengths[0]])
-        self._check_zero_mean_unit_variance(input_features[1, : fbank_feat_lengths[1]])
-        self._check_zero_mean_unit_variance(input_features[2])
-
-        # make sure that if max_length < longest -> then pad to max_length
-        self.assertEqual(input_features.shape, (3, 16, 24))
-
-    def test_double_precision_pad(self):
-        import torch
-
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        np_speech_inputs = np.random.rand(100, 32).astype(np.float64)
-        py_speech_inputs = np_speech_inputs.tolist()
-
-        for inputs in [py_speech_inputs, np_speech_inputs]:
-            np_processed = feature_extractor.pad([{"input_features": inputs}], return_tensors="np")
-            self.assertTrue(np_processed.input_features.dtype == np.float32)
-            pt_processed = feature_extractor.pad([{"input_features": inputs}], return_tensors="pt")
-            self.assertTrue(pt_processed.input_features.dtype == torch.float32)
-
-    def test_different_window(self):
-        import torch
-
-        init_dict = self.feat_extract_tester.prepare_feat_extract_dict()
-        init_dict["win_function"] = "hann_window"
-
-        feature_extractor = self.feature_extraction_class(**init_dict)
-        np_speech_inputs = np.random.rand(100, 32).astype(np.float64)
-        py_speech_inputs = np_speech_inputs.tolist()
-
-        for inputs in [py_speech_inputs, np_speech_inputs]:
-            np_processed = feature_extractor.pad([{"input_features": inputs}], return_tensors="np")
-            self.assertTrue(np_processed.input_features.dtype == np.float32)
-            pt_processed = feature_extractor.pad([{"input_features": inputs}], return_tensors="pt")
-            self.assertTrue(pt_processed.input_features.dtype == torch.float32)
-
-    def _load_datasamples(self, num_samples):
-        from datasets import load_dataset
-
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-        # automatic decoding with librispeech
-        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
-
-        return [x["array"] for x in speech_samples]
-
-    def test_integration(self):
-        # fmt: off
-        expected = np.array([
-            [
-                1.1280,  1.1319,  1.2744,  1.4369,  1.4328,  1.3671,  1.2889,  1.3046,
-                1.4419,  0.8387,  0.2995,  0.0404,  0.1068,  0.0472,  0.3728,  1.3356,
-                1.4491,  0.4770,  0.3997,  0.2776,  0.3184, -0.1243, -0.1170, -0.0828
-            ],
-            [
-                1.0826,  1.0565,  1.2110,  1.3886,  1.3416,  1.2009,  1.1894,  1.2707,
-                1.5153,  0.7005,  0.4916,  0.4017,  0.3743,  0.1935,  0.4228,  1.1084,
-                0.9768,  0.0608,  0.2044,  0.1723,  0.0433, -0.2360, -0.2478, -0.2643
-            ],
-            [
-                1.0590,  0.9923,  1.1185,  1.3309,  1.1971,  1.0067,  1.0080,  1.2036,
-                1.5397,  1.0383,  0.7672,  0.7551,  0.4878,  0.8771,  0.7565,  0.8775,
-                0.9042,  0.4595,  0.6157,  0.4954,  0.1857,  0.0307,  0.0199,  0.1033
-            ],
-        ])
-        # fmt: on
-
-        input_speech = self._load_datasamples(1)
-        feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
-        input_features = feature_extractor(input_speech, sampling_rate=16000, return_tensors="pt").input_features
-        self.assertTrue(np.allclose(input_features[0, 100:103], expected, atol=1e-4))
--- a/tests/models/mctct/test_modeling_mctct.py
+++ b/tests/models/mctct/test_modeling_mctct.py
--- a/tests/models/mctct/test_processor_mctct.py
+++ b/tests/models/mctct/test_processor_mctct.py
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-import shutil
-import tempfile
-import unittest
-
-from transformers import MCTCTProcessor, is_speech_available, is_torch_available
-from transformers.file_utils import FEATURE_EXTRACTOR_NAME
-from transformers.models.wav2vec2.tokenization_wav2vec2 import VOCAB_FILES_NAMES, Wav2Vec2CTCTokenizer
-from transformers.testing_utils import require_torch, require_torchaudio
-
-
-if is_speech_available() and is_torch_available():
-    from transformers import MCTCTFeatureExtractor
-
-    from .test_feature_extraction_mctct import floats_list
-
-
-@require_torch
-@require_torchaudio
-class MCTCTProcessorTest(unittest.TestCase):
-    def setUp(self):
-        vocab = "<pad> <s> </s> <unk> | E T A O N I H S R D L U M W C F G Y P B V K ' X J Q Z".split(" ")
-        vocab_tokens = dict(zip(vocab, range(len(vocab))))
-
-        self.add_kwargs_tokens_map = {
-            "pad_token": "<pad>",
-            "unk_token": "<unk>",
-            "bos_token": "<s>",
-            "eos_token": "</s>",
-        }
-        feature_extractor_map = {
-            "feature_size": 1,
-            "padding_value": 0.0,
-            "sampling_rate": 16000,
-            "return_attention_mask": False,
-            "do_normalize": True,
-        }
-
-        self.tmpdirname = tempfile.mkdtemp()
-        self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
-        self.feature_extraction_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME)
-        with open(self.vocab_file, "w", encoding="utf-8") as fp:
-            fp.write(json.dumps(vocab_tokens) + "\n")
-
-        with open(self.feature_extraction_file, "w", encoding="utf-8") as fp:
-            fp.write(json.dumps(feature_extractor_map) + "\n")
-
-    def get_tokenizer(self, **kwargs_init):
-        kwargs = self.add_kwargs_tokens_map.copy()
-        kwargs.update(kwargs_init)
-        return Wav2Vec2CTCTokenizer.from_pretrained(self.tmpdirname, **kwargs)
-
-    def get_feature_extractor(self, **kwargs):
-        return MCTCTFeatureExtractor.from_pretrained(self.tmpdirname, **kwargs)
-
-    def tearDown(self):
-        shutil.rmtree(self.tmpdirname)
-
-    def test_save_load_pretrained_default(self):
-        tokenizer = self.get_tokenizer()
-        feature_extractor = self.get_feature_extractor()
-
-        processor = MCTCTProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
-
-        processor.save_pretrained(self.tmpdirname)
-        processor = MCTCTProcessor.from_pretrained(self.tmpdirname)
-
-        self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab())
-        self.assertIsInstance(processor.tokenizer, Wav2Vec2CTCTokenizer)
-
-        self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
-        self.assertIsInstance(processor.feature_extractor, MCTCTFeatureExtractor)
-
-    def test_save_load_pretrained_additional_features(self):
-        processor = MCTCTProcessor(tokenizer=self.get_tokenizer(), feature_extractor=self.get_feature_extractor())
-        processor.save_pretrained(self.tmpdirname)
-
-        tokenizer_add_kwargs = self.get_tokenizer(bos_token="(BOS)", eos_token="(EOS)")
-        feature_extractor_add_kwargs = self.get_feature_extractor(do_normalize=False, padding_value=1.0)
-
-        processor = MCTCTProcessor.from_pretrained(
-            self.tmpdirname, bos_token="(BOS)", eos_token="(EOS)", do_normalize=False, padding_value=1.0
-        )
-
-        self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
-        self.assertIsInstance(processor.tokenizer, Wav2Vec2CTCTokenizer)
-
-        self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor_add_kwargs.to_json_string())
-        self.assertIsInstance(processor.feature_extractor, MCTCTFeatureExtractor)
-
-    def test_feature_extractor(self):
-        feature_extractor = self.get_feature_extractor()
-        tokenizer = self.get_tokenizer()
-
-        processor = MCTCTProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
-
-        raw_speech = floats_list((3, 1000))
-
-        input_feat_extract = feature_extractor(raw_speech, return_tensors="np")
-        input_processor = processor(raw_speech, return_tensors="np")
-
-        for key in input_feat_extract.keys():
-            self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
-
-    def test_tokenizer(self):
-        feature_extractor = self.get_feature_extractor()
-        tokenizer = self.get_tokenizer()
-
-        processor = MCTCTProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
-
-        input_str = "This is a test string"
-
-        encoded_processor = processor(text=input_str)
-
-        encoded_tok = tokenizer(input_str)
-
-        for key in encoded_tok.keys():
-            self.assertListEqual(encoded_tok[key], encoded_processor[key])
-
-    def test_tokenizer_decode(self):
-        feature_extractor = self.get_feature_extractor()
-        tokenizer = self.get_tokenizer()
-
-        processor = MCTCTProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
-
-        predicted_ids = [[1, 4, 5, 8, 1, 0, 8], [3, 4, 3, 1, 1, 8, 9]]
-
-        decoded_processor = processor.batch_decode(predicted_ids)
-        decoded_tok = tokenizer.batch_decode(predicted_ids)
-
-        self.assertListEqual(decoded_tok, decoded_processor)
-
-    def test_model_input_names(self):
-        feature_extractor = self.get_feature_extractor()
-        tokenizer = self.get_tokenizer()
-
-        processor = MCTCTProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
-
-        self.assertListEqual(
-            processor.model_input_names,
-            feature_extractor.model_input_names,
-            msg="`processor` and `feature_extractor` model input names do not match",
-        )
--- a/tests/models/retribert/__init__.py
+++ b/tests/models/retribert/__init__.py
--- a/tests/models/retribert/test_tokenization_retribert.py
+++ b/tests/models/retribert/test_tokenization_retribert.py
-# coding=utf-8
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" Testing suite for the RetriBERT tokenizer. """
-
-
-import os
-import unittest
-
-from transformers import RetriBertTokenizer, RetriBertTokenizerFast
-from transformers.models.bert.tokenization_bert import (
-    VOCAB_FILES_NAMES,
-    BasicTokenizer,
-    WordpieceTokenizer,
-    _is_control,
-    _is_punctuation,
-    _is_whitespace,
-)
-from transformers.testing_utils import require_tokenizers, require_torch, slow
-
-from ...test_tokenization_common import TokenizerTesterMixin, filter_non_english, merge_model_tokenizer_mappings
-
-
-# Copied from transformers.tests.bert.test_modeling_bert.py with Bert->RetriBert
-@require_tokenizers
-class RetriBertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
-    tokenizer_class = RetriBertTokenizer
-    test_slow_tokenizer = True
-    rust_tokenizer_class = RetriBertTokenizerFast
-    test_rust_tokenizer = True
-    space_between_special_tokens = True
-    from_pretrained_filter = filter_non_english
-
-    def setUp(self):
-        super().setUp()
-
-        vocab_tokens = [
-            "[UNK]",
-            "[CLS]",
-            "[SEP]",
-            "[PAD]",
-            "[MASK]",
-            "want",
-            "##want",
-            "##ed",
-            "wa",
-            "un",
-            "runn",
-            "##ing",
-            ",",
-            "low",
-            "lowest",
-        ]
-        self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
-        with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
-            vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
-
-    def get_input_output_texts(self, tokenizer):
-        input_text = "UNwant\u00E9d,running"
-        output_text = "unwanted, running"
-        return input_text, output_text
-
-    def test_full_tokenizer(self):
-        tokenizer = self.tokenizer_class(self.vocab_file)
-
-        tokens = tokenizer.tokenize("UNwant\u00E9d,running")
-        self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
-        self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [9, 6, 7, 12, 10, 11])
-
-    def test_rust_and_python_full_tokenizers(self):
-        if not self.test_rust_tokenizer:
-            return
-
-        tokenizer = self.get_tokenizer()
-        rust_tokenizer = self.get_rust_tokenizer()
-
-        sequence = "UNwant\u00E9d,running"
-
-        tokens = tokenizer.tokenize(sequence)
-        rust_tokens = rust_tokenizer.tokenize(sequence)
-        self.assertListEqual(tokens, rust_tokens)
-
-        ids = tokenizer.encode(sequence, add_special_tokens=False)
-        rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False)
-        self.assertListEqual(ids, rust_ids)
-
-        rust_tokenizer = self.get_rust_tokenizer()
-        ids = tokenizer.encode(sequence)
-        rust_ids = rust_tokenizer.encode(sequence)
-        self.assertListEqual(ids, rust_ids)
-
-        # With lower casing
-        tokenizer = self.get_tokenizer(do_lower_case=True)
-        rust_tokenizer = self.get_rust_tokenizer(do_lower_case=True)
-
-        sequence = "UNwant\u00E9d,running"
-
-        tokens = tokenizer.tokenize(sequence)
-        rust_tokens = rust_tokenizer.tokenize(sequence)
-        self.assertListEqual(tokens, rust_tokens)
-
-        ids = tokenizer.encode(sequence, add_special_tokens=False)
-        rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False)
-        self.assertListEqual(ids, rust_ids)
-
-        rust_tokenizer = self.get_rust_tokenizer()
-        ids = tokenizer.encode(sequence)
-        rust_ids = rust_tokenizer.encode(sequence)
-        self.assertListEqual(ids, rust_ids)
-
-    def test_chinese(self):
-        tokenizer = BasicTokenizer()
-
-        self.assertListEqual(tokenizer.tokenize("ah\u535A\u63A8zz"), ["ah", "\u535A", "\u63A8", "zz"])
-
-    def test_basic_tokenizer_lower(self):
-        tokenizer = BasicTokenizer(do_lower_case=True)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHeLLo!how  \n Are yoU?  "), ["hello", "!", "how", "are", "you", "?"]
-        )
-        self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"])
-
-    def test_basic_tokenizer_lower_strip_accents_false(self):
-        tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=False)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHäLLo!how  \n Are yoU?  "), ["hällo", "!", "how", "are", "you", "?"]
-        )
-        self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["h\u00E9llo"])
-
-    def test_basic_tokenizer_lower_strip_accents_true(self):
-        tokenizer = BasicTokenizer(do_lower_case=True, strip_accents=True)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHäLLo!how  \n Are yoU?  "), ["hallo", "!", "how", "are", "you", "?"]
-        )
-        self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"])
-
-    def test_basic_tokenizer_lower_strip_accents_default(self):
-        tokenizer = BasicTokenizer(do_lower_case=True)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHäLLo!how  \n Are yoU?  "), ["hallo", "!", "how", "are", "you", "?"]
-        )
-        self.assertListEqual(tokenizer.tokenize("H\u00E9llo"), ["hello"])
-
-    def test_basic_tokenizer_no_lower(self):
-        tokenizer = BasicTokenizer(do_lower_case=False)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHeLLo!how  \n Are yoU?  "), ["HeLLo", "!", "how", "Are", "yoU", "?"]
-        )
-
-    def test_basic_tokenizer_no_lower_strip_accents_false(self):
-        tokenizer = BasicTokenizer(do_lower_case=False, strip_accents=False)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHäLLo!how  \n Are yoU?  "), ["HäLLo", "!", "how", "Are", "yoU", "?"]
-        )
-
-    def test_basic_tokenizer_no_lower_strip_accents_true(self):
-        tokenizer = BasicTokenizer(do_lower_case=False, strip_accents=True)
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHäLLo!how  \n Are yoU?  "), ["HaLLo", "!", "how", "Are", "yoU", "?"]
-        )
-
-    def test_basic_tokenizer_respects_never_split_tokens(self):
-        tokenizer = BasicTokenizer(do_lower_case=False, never_split=["[UNK]"])
-
-        self.assertListEqual(
-            tokenizer.tokenize(" \tHeLLo!how  \n Are yoU? [UNK]"), ["HeLLo", "!", "how", "Are", "yoU", "?", "[UNK]"]
-        )
-
-    def test_wordpiece_tokenizer(self):
-        vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing"]
-
-        vocab = {}
-        for i, token in enumerate(vocab_tokens):
-            vocab[token] = i
-        tokenizer = WordpieceTokenizer(vocab=vocab, unk_token="[UNK]")
-
-        self.assertListEqual(tokenizer.tokenize(""), [])
-
-        self.assertListEqual(tokenizer.tokenize("unwanted running"), ["un", "##want", "##ed", "runn", "##ing"])
-
-        self.assertListEqual(tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"])
-
-    def test_is_whitespace(self):
-        self.assertTrue(_is_whitespace(" "))
-        self.assertTrue(_is_whitespace("\t"))
-        self.assertTrue(_is_whitespace("\r"))
-        self.assertTrue(_is_whitespace("\n"))
-        self.assertTrue(_is_whitespace("\u00A0"))
-
-        self.assertFalse(_is_whitespace("A"))
-        self.assertFalse(_is_whitespace("-"))
-
-    def test_is_control(self):
-        self.assertTrue(_is_control("\u0005"))
-
-        self.assertFalse(_is_control("A"))
-        self.assertFalse(_is_control(" "))
-        self.assertFalse(_is_control("\t"))
-        self.assertFalse(_is_control("\r"))
-
-    def test_is_punctuation(self):
-        self.assertTrue(_is_punctuation("-"))
-        self.assertTrue(_is_punctuation("$"))
-        self.assertTrue(_is_punctuation("`"))
-        self.assertTrue(_is_punctuation("."))
-
-        self.assertFalse(_is_punctuation("A"))
-        self.assertFalse(_is_punctuation(" "))
-
-    def test_clean_text(self):
-        tokenizer = self.get_tokenizer()
-        rust_tokenizer = self.get_rust_tokenizer()
-
-        # Example taken from the issue https://github.com/huggingface/tokenizers/issues/340
-        self.assertListEqual([tokenizer.tokenize(t) for t in ["Test", "\xad", "test"]], [["[UNK]"], [], ["[UNK]"]])
-
-        self.assertListEqual(
-            [rust_tokenizer.tokenize(t) for t in ["Test", "\xad", "test"]], [["[UNK]"], [], ["[UNK]"]]
-        )
-
-    @slow
-    def test_sequence_builders(self):
-        tokenizer = self.tokenizer_class.from_pretrained("yjernite/retribert-base-uncased")
-
-        text = tokenizer.encode("sequence builders", add_special_tokens=False)
-        text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
-
-        encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
-        encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
-
-        assert encoded_sentence == [101] + text + [102]
-        assert encoded_pair == [101] + text + [102] + text_2 + [102]
-
-    def test_offsets_with_special_characters(self):
-        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
-            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
-                tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
-
-                sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence."
-                tokens = tokenizer_r.encode_plus(
-                    sentence,
-                    return_attention_mask=False,
-                    return_token_type_ids=False,
-                    return_offsets_mapping=True,
-                    add_special_tokens=True,
-                )
-
-                do_lower_case = tokenizer_r.do_lower_case if hasattr(tokenizer_r, "do_lower_case") else False
-                expected_results = (
-                    [
-                        ((0, 0), tokenizer_r.cls_token),
-                        ((0, 1), "A"),
-                        ((1, 2), ","),
-                        ((3, 5), "na"),
-                        ((5, 6), "##ï"),
-                        ((6, 8), "##ve"),
-                        ((9, 15), tokenizer_r.mask_token),
-                        ((16, 21), "Allen"),
-                        ((21, 23), "##NL"),
-                        ((23, 24), "##P"),
-                        ((25, 33), "sentence"),
-                        ((33, 34), "."),
-                        ((0, 0), tokenizer_r.sep_token),
-                    ]
-                    if not do_lower_case
-                    else [
-                        ((0, 0), tokenizer_r.cls_token),
-                        ((0, 1), "a"),
-                        ((1, 2), ","),
-                        ((3, 8), "naive"),
-                        ((9, 15), tokenizer_r.mask_token),
-                        ((16, 21), "allen"),
-                        ((21, 23), "##nl"),
-                        ((23, 24), "##p"),
-                        ((25, 33), "sentence"),
-                        ((33, 34), "."),
-                        ((0, 0), tokenizer_r.sep_token),
-                    ]
-                )
-
-                self.assertEqual(
-                    [e[1] for e in expected_results], tokenizer_r.convert_ids_to_tokens(tokens["input_ids"])
-                )
-                self.assertEqual([e[0] for e in expected_results], tokens["offset_mapping"])
-
-    def test_change_tokenize_chinese_chars(self):
-        list_of_commun_chinese_char = ["的", "人", "有"]
-        text_with_chinese_char = "".join(list_of_commun_chinese_char)
-        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
-            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
-                kwargs["tokenize_chinese_chars"] = True
-                tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
-                tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
-
-                ids_without_spe_char_p = tokenizer_p.encode(text_with_chinese_char, add_special_tokens=False)
-                ids_without_spe_char_r = tokenizer_r.encode(text_with_chinese_char, add_special_tokens=False)
-
-                tokens_without_spe_char_r = tokenizer_r.convert_ids_to_tokens(ids_without_spe_char_r)
-                tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)
-
-                # it is expected that each Chinese character is not preceded by "##"
-                self.assertListEqual(tokens_without_spe_char_p, list_of_commun_chinese_char)
-                self.assertListEqual(tokens_without_spe_char_r, list_of_commun_chinese_char)
-
-                kwargs["tokenize_chinese_chars"] = False
-                tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
-                tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
-
-                ids_without_spe_char_r = tokenizer_r.encode(text_with_chinese_char, add_special_tokens=False)
-                ids_without_spe_char_p = tokenizer_p.encode(text_with_chinese_char, add_special_tokens=False)
-
-                tokens_without_spe_char_r = tokenizer_r.convert_ids_to_tokens(ids_without_spe_char_r)
-                tokens_without_spe_char_p = tokenizer_p.convert_ids_to_tokens(ids_without_spe_char_p)
-
-                # it is expected that only the first Chinese character is not preceded by "##".
-                expected_tokens = [
-                    f"##{token}" if idx != 0 else token for idx, token in enumerate(list_of_commun_chinese_char)
-                ]
-                self.assertListEqual(tokens_without_spe_char_p, expected_tokens)
-                self.assertListEqual(tokens_without_spe_char_r, expected_tokens)
-
-    # RetriBertModel doesn't define `get_input_embeddings` and it's forward method doesn't take only the output of the tokenizer as input
-    @require_torch
-    @slow
-    def test_torch_encode_plus_sent_to_model(self):
-        import torch
-
-        from transformers import MODEL_MAPPING, TOKENIZER_MAPPING
-
-        MODEL_TOKENIZER_MAPPING = merge_model_tokenizer_mappings(MODEL_MAPPING, TOKENIZER_MAPPING)
-
-        tokenizers = self.get_tokenizers(do_lower_case=False)
-        for tokenizer in tokenizers:
-            with self.subTest(f"{tokenizer.__class__.__name__}"):
-                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
-
-                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
-                config = config_class()
-
-                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
-
-                model = model_class(config)
-
-                # The following test is different from the common's one
-                self.assertGreaterEqual(model.bert_query.get_input_embeddings().weight.shape[0], len(tokenizer))
-
-                # Build sequence
-                first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
-                sequence = " ".join(first_ten_tokens)
-                encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="pt")
-
-                # Ensure that the BatchEncoding.to() method works.
-                encoded_sequence.to(model.device)
-
-                batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="pt")
-                # This should not fail
-
-                with torch.no_grad():  # saves some time
-                    # The following lines are different from the common's ones
-                    model.embed_questions(**encoded_sequence)
-                    model.embed_questions(**batch_encoded_sequence)
--- a/tests/models/tapex/__init__.py
+++ b/tests/models/tapex/__init__.py
--- a/tests/models/tapex/test_tokenization_tapex.py
+++ b/tests/models/tapex/test_tokenization_tapex.py
--- a/tests/models/trajectory_transformer/__init__.py
+++ b/tests/models/trajectory_transformer/__init__.py
--- a/tests/models/trajectory_transformer/test_modeling_trajectory_transformer.py
+++ b/tests/models/trajectory_transformer/test_modeling_trajectory_transformer.py
-# coding=utf-8
-# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" Testing suite for the PyTorch TrajectoryTransformer model. """
-
-
-import inspect
-import unittest
-
-import numpy as np
-
-from transformers import TrajectoryTransformerConfig, is_torch_available
-from transformers.testing_utils import require_torch, slow, torch_device
-
-from ...generation.test_utils import GenerationTesterMixin
-from ...test_configuration_common import ConfigTester
-from ...test_modeling_common import ModelTesterMixin, _config_zero_init, random_attention_mask
-from ...test_pipeline_mixin import PipelineTesterMixin
-
-
-if is_torch_available():
-    import torch
-
-    from transformers import TrajectoryTransformerModel
-    from transformers.models.trajectory_transformer.modeling_trajectory_transformer import (
-        TRAJECTORY_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
-    )
-
-
-class TrajectoryTransformerModelTester:
-    def __init__(self, parent, batch_size=13, n_embd=128, action_dim=6, observation_dim=17, is_training=True):
-        self.parent = parent
-        self.batch_size = batch_size
-        self.n_embd = n_embd
-        self.action_dim = action_dim
-        self.observation_dim = observation_dim
-        self.is_training = is_training
-        self.seq_length = self.action_dim + self.observation_dim + 1
-
-    def prepare_config_and_inputs(self):
-        trajectories = torch.LongTensor([np.random.permutation(self.seq_length) for _ in range(self.batch_size)]).to(
-            torch_device
-        )
-        attention_mask = random_attention_mask((self.batch_size, self.seq_length)).to(torch_device)
-        targets = torch.LongTensor([np.random.permutation(self.seq_length) for _ in range(self.batch_size)]).to(
-            torch_device
-        )
-
-        config = self.get_config()
-        return config, trajectories, attention_mask, targets
-
-    def get_config(self):
-        return TrajectoryTransformerConfig(
-            batch_size=self.batch_size,
-            n_embd=self.n_embd,
-            action_dim=self.action_dim,
-            observation_dim=self.observation_dim,
-        )
-
-    def create_and_check_model(self, config, input_dict):
-        model = TrajectoryTransformerModel(config=config)
-        model.to(torch_device)
-        model.eval()
-
-        result = model(trajectories=input_dict["trajectories"], attention_mask=input_dict["attention_mask"])
-        result = model(
-            trajectories=input_dict["trajectories"],
-            output_hidden_states=True,
-            output_attentions=True,
-            use_cache=True,
-            return_dict=True,
-        )
-
-        self.parent.assertEqual(result.hidden_states[-1].shape, (self.batch_size, self.seq_length, self.n_embd))
-
-    def prepare_config_and_inputs_for_common(self):
-        config_and_inputs = self.prepare_config_and_inputs()
-        (config, trajectories, attention_mask, targets) = config_and_inputs
-        inputs_dict = {"trajectories": trajectories, "attention_mask": attention_mask, "targets": targets}
-        return config, inputs_dict
-
-
-@require_torch
-class TrajectoryTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
-    all_model_classes = (TrajectoryTransformerModel,) if is_torch_available() else ()
-    pipeline_model_mapping = {"feature-extraction": TrajectoryTransformerModel} if is_torch_available() else {}
-
-    # Ignoring of a failing test from GenerationTesterMixin, as the model does not use inputs_ids
-    test_generate_without_input_ids = False
-
-    # Ignoring of a failing tests from ModelTesterMixin, as the model does not implement these features
-    test_pruning = False
-    test_resize_embeddings = False
-    test_head_masking = False
-    test_attention_outputs = False
-    test_hidden_states_output = False
-    test_inputs_embeds = False
-    test_model_common_attributes = False
-    test_torchscript = False
-
-    def setUp(self):
-        self.model_tester = TrajectoryTransformerModelTester(self)
-        self.config_tester = ConfigTester(self, config_class=TrajectoryTransformerConfig, n_embd=37)
-
-    def test_config(self):
-        self.config_tester.run_common_tests()
-
-    def test_model(self):
-        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
-        self.model_tester.create_and_check_model(*config_and_inputs)
-
-    def test_conditional_model(self):
-        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
-        self.model_tester.create_and_check_model(*config_and_inputs)
-
-    def test_forward_signature(self):
-        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
-
-        for model_class in self.all_model_classes:
-            model = model_class(config)
-            signature = inspect.signature(model.forward)
-            # signature.parameters is an OrderedDict => so arg_names order is deterministic
-            arg_names = [*signature.parameters.keys()]
-
-            expected_arg_names = ["trajectories"]
-            self.assertListEqual(arg_names[:1], expected_arg_names)
-
-    # # Input is 'trajectories' not 'input_ids'
-    def test_model_main_input_name(self):
-        model_signature = inspect.signature(getattr(TrajectoryTransformerModel, "forward"))
-        # The main input is the name of the argument after `self`
-        observed_main_input_name = list(model_signature.parameters.keys())[1]
-        self.assertEqual(TrajectoryTransformerModel.main_input_name, observed_main_input_name)
-
-    def test_retain_grad_hidden_states_attentions(self):
-        config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        config.output_hidden_states = True
-        config.output_attentions = self.has_attentions
-
-        model = TrajectoryTransformerModel(config)
-        model.to(torch_device)
-
-        outputs = model(
-            trajectories=input_dict["trajectories"],
-            attention_mask=input_dict["attention_mask"],
-            targets=input_dict["targets"],
-            output_hidden_states=True,
-            output_attentions=True,
-            use_cache=True,
-            return_dict=True,
-        )
-
-        output = outputs[0]
-        hidden_states = outputs.hidden_states[0]
-        hidden_states.retain_grad()
-
-        if self.has_attentions:
-            attentions = outputs.attentions[0]
-            attentions.retain_grad()
-
-        output.flatten()[0].backward(retain_graph=True)
-
-        self.assertIsNotNone(hidden_states.grad)
-
-        if self.has_attentions:
-            self.assertIsNotNone(attentions.grad)
-
-    def test_training(self):
-        if not self.model_tester.is_training:
-            return
-
-        config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
-        model = TrajectoryTransformerModel(config)
-        model.to(torch_device)
-        model.train()
-        loss = model(
-            trajectories=input_dict["trajectories"],
-            attention_mask=input_dict["attention_mask"],
-            targets=input_dict["targets"],
-            output_hidden_states=True,
-            output_attentions=True,
-            use_cache=True,
-            return_dict=True,
-        ).loss
-        loss.backward()
-
-    def test_training_gradient_checkpointing(self):
-        if not self.model_tester.is_training:
-            return
-
-        config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
-        model = TrajectoryTransformerModel(config)
-        model.gradient_checkpointing_enable()
-        model.to(torch_device)
-        model.train()
-        loss = model(
-            trajectories=input_dict["trajectories"],
-            attention_mask=input_dict["attention_mask"],
-            targets=input_dict["targets"],
-            output_hidden_states=True,
-            output_attentions=True,
-            use_cache=False,
-            return_dict=True,
-        ).loss
-        loss.backward()
-
-    def test_initialization(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
-        configs_no_init = _config_zero_init(config)
-        for model_class in self.all_model_classes:
-            model = model_class(config=configs_no_init)
-            for name, param in model.named_parameters():
-                if param.requires_grad:
-                    self.assertIn(
-                        ((param.data.mean() * 1e9).round() / 1e9).item(),
-                        [0.0, 1.0],
-                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
-                    )
-
-    @slow
-    def test_model_from_pretrained(self):
-        for model_name in TRAJECTORY_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-            model = TrajectoryTransformerModel.from_pretrained(model_name)
-            self.assertIsNotNone(model)
-
-
-@require_torch
-class TrajectoryTransformerModelIntegrationTest(unittest.TestCase):
-    @slow
-    def test_prediction(self):
-        batch_size = 1
-
-        config = TrajectoryTransformerConfig.from_pretrained("CarlCochet/trajectory-transformer-halfcheetah-medium-v2")
-        model = TrajectoryTransformerModel.from_pretrained(
-            "CarlCochet/trajectory-transformer-halfcheetah-medium-v2", config=config
-        )
-        model.to(torch_device)
-        model.eval()
-
-        seq_length = model.config.action_dim + model.config.observation_dim + 1
-
-        trajectories = torch.LongTensor(
-            [[3, 19, 20, 22, 9, 7, 23, 10, 18, 14, 13, 4, 17, 11, 5, 6, 15, 21, 2, 8, 1, 0, 12, 16]]
-        ).to(torch_device)
-        outputs = model(
-            trajectories=trajectories,
-            output_hidden_states=True,
-            output_attentions=True,
-            use_cache=True,
-            return_dict=True,
-        )
-
-        output = outputs.logits
-
-        expected_shape = torch.Size((batch_size, seq_length, model.config.vocab_size + 1))
-        expected_slice = torch.tensor(
-            [[[-0.7193, -0.2532, -0.0898], [1.9429, 2.0434, 2.3975], [-3.3651, -2.8744, -2.4532]]]
-        ).to(torch_device)
-        output_slice = output[:, :3, :3]
-
-        self.assertEqual(output.shape, expected_shape)
-        self.assertTrue(torch.allclose(output_slice, expected_slice, atol=1e-4))
--- a/tests/models/van/__init__.py
+++ b/tests/models/van/__init__.py
--- a/tests/models/van/test_modeling_van.py
+++ b/tests/models/van/test_modeling_van.py
-# coding=utf-8
-# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" Testing suite for the PyTorch Van model. """
-
-
-import inspect
-import math
-import unittest
-
-from transformers import VanConfig
-from transformers.testing_utils import require_scipy, require_torch, require_vision, slow, torch_device
-from transformers.utils import cached_property, is_scipy_available, is_torch_available, is_vision_available
-
-from ...test_configuration_common import ConfigTester
-from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
-from ...test_pipeline_mixin import PipelineTesterMixin
-
-
-if is_scipy_available():
-    from scipy import stats
-
-if is_torch_available():
-    import torch
-    from torch import nn
-
-    from transformers import VanForImageClassification, VanModel
-    from transformers.models.van.modeling_van import VAN_PRETRAINED_MODEL_ARCHIVE_LIST
-
-
-if is_vision_available():
-    from PIL import Image
-
-    from transformers import AutoImageProcessor
-
-
-class VanModelTester:
-    def __init__(
-        self,
-        parent,
-        batch_size=2,
-        image_size=224,
-        num_channels=3,
-        hidden_sizes=[16, 32, 64, 128],
-        depths=[1, 1, 1, 1],
-        is_training=True,
-        use_labels=True,
-        num_labels=3,
-        scope=None,
-    ):
-        self.parent = parent
-        self.batch_size = batch_size
-        self.image_size = image_size
-        self.num_channels = num_channels
-        self.hidden_sizes = hidden_sizes
-        self.depths = depths
-        self.is_training = is_training
-        self.use_labels = use_labels
-        self.num_labels = num_labels
-        self.type_sequence_label_size = num_labels
-
-    def prepare_config_and_inputs(self):
-        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
-
-        labels = None
-        if self.use_labels:
-            labels = ids_tensor([self.batch_size], self.num_labels)
-
-        config = self.get_config()
-
-        return config, pixel_values, labels
-
-    def get_config(self):
-        return VanConfig(
-            num_channels=self.num_channels,
-            hidden_sizes=self.hidden_sizes,
-            depths=self.depths,
-            num_labels=self.num_labels,
-            is_decoder=False,
-        )
-
-    def create_and_check_model(self, config, pixel_values, labels):
-        model = VanModel(config=config)
-        model.to(torch_device)
-        model.eval()
-        result = model(pixel_values)
-        # expected last hidden states: B, C, H // 32, W // 32
-        self.parent.assertEqual(
-            result.last_hidden_state.shape,
-            (self.batch_size, self.hidden_sizes[-1], self.image_size // 32, self.image_size // 32),
-        )
-
-    def create_and_check_for_image_classification(self, config, pixel_values, labels):
-        model = VanForImageClassification(config)
-        model.to(torch_device)
-        model.eval()
-        result = model(pixel_values, labels=labels)
-        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
-
-    def prepare_config_and_inputs_for_common(self):
-        config_and_inputs = self.prepare_config_and_inputs()
-        config, pixel_values, labels = config_and_inputs
-        inputs_dict = {"pixel_values": pixel_values}
-        return config, inputs_dict
-
-
-@require_torch
-class VanModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
-    """
-    Here we also overwrite some of the tests of test_modeling_common.py, as Van does not use input_ids, inputs_embeds,
-    attention_mask and seq_length.
-    """
-
-    all_model_classes = (VanModel, VanForImageClassification) if is_torch_available() else ()
-    pipeline_model_mapping = (
-        {"feature-extraction": VanModel, "image-classification": VanForImageClassification}
-        if is_torch_available()
-        else {}
-    )
-
-    test_pruning = False
-    test_resize_embeddings = False
-    test_head_masking = False
-    has_attentions = False
-
-    def setUp(self):
-        self.model_tester = VanModelTester(self)
-        self.config_tester = ConfigTester(self, config_class=VanConfig, has_text_modality=False, hidden_size=37)
-
-    def test_config(self):
-        self.create_and_test_config_common_properties()
-        self.config_tester.create_and_test_config_to_json_string()
-        self.config_tester.create_and_test_config_to_json_file()
-        self.config_tester.create_and_test_config_from_and_save_pretrained()
-        self.config_tester.create_and_test_config_with_num_labels()
-        self.config_tester.check_config_can_be_init_without_params()
-        self.config_tester.check_config_arguments_init()
-
-    def create_and_test_config_common_properties(self):
-        return
-
-    @unittest.skip(reason="Van does not use inputs_embeds")
-    def test_inputs_embeds(self):
-        pass
-
-    @unittest.skip(reason="Van does not support input and output embeddings")
-    def test_model_common_attributes(self):
-        pass
-
-    def test_forward_signature(self):
-        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
-
-        for model_class in self.all_model_classes:
-            model = model_class(config)
-            signature = inspect.signature(model.forward)
-            # signature.parameters is an OrderedDict => so arg_names order is deterministic
-            arg_names = [*signature.parameters.keys()]
-
-            expected_arg_names = ["pixel_values"]
-            self.assertListEqual(arg_names[:1], expected_arg_names)
-
-    def test_model(self):
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_model(*config_and_inputs)
-
-    @require_scipy
-    def test_initialization(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        configs_no_init = _config_zero_init(config)
-
-        for model_class in self.all_model_classes:
-            model = model_class(config=configs_no_init)
-            for name, module in model.named_modules():
-                if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm, nn.LayerNorm)):
-                    self.assertTrue(
-                        torch.all(module.weight == 1),
-                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
-                    )
-                    self.assertTrue(
-                        torch.all(module.bias == 0),
-                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
-                    )
-                elif isinstance(module, nn.Conv2d):
-                    fan_out = module.kernel_size[0] * module.kernel_size[1] * module.out_channels
-                    fan_out //= module.groups
-                    std = math.sqrt(2.0 / fan_out)
-                    # divide by std -> mean = 0, std = 1
-                    data = module.weight.data.cpu().flatten().numpy() / std
-                    test = stats.anderson(data)
-                    self.assertTrue(test.statistic > 0.05)
-
-    def test_hidden_states_output(self):
-        def check_hidden_states_output(inputs_dict, config, model_class):
-            model = model_class(config)
-            model.to(torch_device)
-            model.eval()
-
-            with torch.no_grad():
-                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
-
-            hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states
-
-            expected_num_stages = len(self.model_tester.hidden_sizes)
-            # van has no embeddings
-            self.assertEqual(len(hidden_states), expected_num_stages)
-
-            # Van's feature maps are of shape (batch_size, num_channels, height, width)
-            self.assertListEqual(
-                list(hidden_states[0].shape[-2:]),
-                [self.model_tester.image_size // 4, self.model_tester.image_size // 4],
-            )
-
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
-        for model_class in self.all_model_classes:
-            inputs_dict["output_hidden_states"] = True
-            check_hidden_states_output(inputs_dict, config, model_class)
-
-            # check that output_hidden_states also work using config
-            del inputs_dict["output_hidden_states"]
-            config.output_hidden_states = True
-
-            check_hidden_states_output(inputs_dict, config, model_class)
-
-    def test_for_image_classification(self):
-        config_and_inputs = self.model_tester.prepare_config_and_inputs()
-        self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
-
-    @slow
-    def test_model_from_pretrained(self):
-        for model_name in VAN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-            model = VanModel.from_pretrained(model_name)
-            self.assertIsNotNone(model)
-
-
-# We will verify our results on an image of cute cats
-def prepare_img():
-    image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
-    return image
-
-
-@require_torch
-@require_vision
-class VanModelIntegrationTest(unittest.TestCase):
-    @cached_property
-    def default_image_processor(self):
-        return AutoImageProcessor.from_pretrained(VAN_PRETRAINED_MODEL_ARCHIVE_LIST[0])
-
-    @slow
-    def test_inference_image_classification_head(self):
-        model = VanForImageClassification.from_pretrained(VAN_PRETRAINED_MODEL_ARCHIVE_LIST[0]).to(torch_device)
-
-        image_processor = self.default_image_processor
-        image = prepare_img()
-        inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
-
-        # forward pass
-        with torch.no_grad():
-            outputs = model(**inputs)
-
-        # verify the logits
-        expected_shape = torch.Size((1, 1000))
-        self.assertEqual(outputs.logits.shape, expected_shape)
-
-        expected_slice = torch.tensor([0.1029, -0.0904, -0.6365]).to(torch_device)
-
-        self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))
--- a/utils/check_config_attributes.py
+++ b/utils/check_config_attributes.py
@@ -69,10 +69,6 @@ SPECIAL_CASES_TO_ALLOW = {
    "CvtConfig": ["layer_norm_eps"],
    # having default values other than `1e-5` - we can't fix them without breaking
    "PerceiverConfig": ["layer_norm_eps"],
-    # having default values other than `1e-5` - we can't fix them without breaking
-    "RetriBertConfig": ["layer_norm_eps"],
-    # having default values other than `1e-5` - we can't fix them without breaking
-    "TrajectoryTransformerConfig": ["layer_norm_eps"],
    # used internally to calculate the feature size
    "InformerConfig": ["num_static_real_features", "num_time_features"],
    # used internally to calculate the feature size
@@ -106,7 +102,6 @@ SPECIAL_CASES_TO_ALLOW.update(
        "OneFormerConfig": True,
        "PerceiverConfig": True,
        "RagConfig": True,
-        "RetriBertConfig": True,
        "SpeechT5Config": True,
        "SwinConfig": True,
        "Swin2SRConfig": True,
@@ -114,11 +109,9 @@ SPECIAL_CASES_TO_ALLOW.update(
        "SwitchTransformersConfig": True,
        "TableTransformerConfig": True,
        "TapasConfig": True,
-        "TrajectoryTransformerConfig": True,
        "TransfoXLConfig": True,
        "UniSpeechConfig": True,
        "UniSpeechSatConfig": True,
-        "VanConfig": True,
        "WavLMConfig": True,
        "WhisperConfig": True,
        # TODO: @Arthur (for `alignment_head` and `alignment_layer`)
@@ -267,6 +260,9 @@ def check_config_attributes():
    """Check the arguments in `__init__` of all configuration classes are used in  python files"""
    configs_with_unused_attributes = {}
    for _config_class in list(CONFIG_MAPPING.values()):
+        # Skip deprecated models
+        if "models.deprecated" in _config_class.__module__:
+            continue
        # Some config classes are not in `CONFIG_MAPPING` (e.g. `CLIPVisionConfig`, `Blip2VisionConfig`, etc.)
        config_classes_in_module = [
            cls

--- a/utils/check_config_docstrings.py
+++ b/utils/check_config_docstrings.py
@@ -74,6 +74,9 @@ def check_config_docstrings_have_checkpoints():
    configs_without_checkpoint = []

    for config_class in list(CONFIG_MAPPING.values()):
+        # Skip deprecated models
+        if "models.deprecated" in config_class.__module__:
+            continue
        checkpoint = get_checkpoint_from_config_class(config_class)

        name = config_class.__name__

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -400,6 +400,8 @@ def check_model_list():
    models_dir = os.path.join(PATH_TO_TRANSFORMERS, "models")
    _models = []
    for model in os.listdir(models_dir):
+        if model == "deprecated":
+            continue
        model_dir = os.path.join(models_dir, model)
        if os.path.isdir(model_dir) and "__init__.py" in os.listdir(model_dir):
            _models.append(model)
@@ -445,6 +447,8 @@ def get_model_modules():
    ]
    modules = []
    for model in dir(transformers.models):
+        if model == "deprecated":
+            continue
        # There are some magic dunder attributes in the dir, we ignore them
        if not model.startswith("__"):
            model_module = getattr(transformers.models, model)
@@ -767,6 +771,8 @@ def check_objects_being_equally_in_main_init():
        obj = getattr(transformers, attr)
        if hasattr(obj, "__module__"):
            module_path = obj.__module__
+            if "models.deprecated" in module_path:
+                continue
            module_name = module_path.split(".")[-1]
            module_dir = ".".join(module_path.split(".")[:-1])
            if (

--- a/utils/documentation_tests.txt
+++ b/utils/documentation_tests.txt
@@ -277,9 +277,6 @@ src/transformers/models/mbart/tokenization_mbart.py
 src/transformers/models/mbart/tokenization_mbart_fast.py
 src/transformers/models/mbart50/tokenization_mbart50.py
 src/transformers/models/mbart50/tokenization_mbart50_fast.py
-src/transformers/models/mctct/configuration_mctct.py
-src/transformers/models/mctct/feature_extraction_mctct.py
-src/transformers/models/mctct/processing_mctct.py
 src/transformers/models/megatron_bert/configuration_megatron_bert.py
 src/transformers/models/mgp_str/processing_mgp_str.py
 src/transformers/models/mgp_str/tokenization_mgp_str.py
@@ -362,8 +359,6 @@ src/transformers/models/rembert/tokenization_rembert_fast.py
 src/transformers/models/resnet/configuration_resnet.py
 src/transformers/models/resnet/modeling_resnet.py
 src/transformers/models/resnet/modeling_tf_resnet.py
-src/transformers/models/retribert/tokenization_retribert.py
-src/transformers/models/retribert/tokenization_retribert_fast.py
 src/transformers/models/roberta/configuration_roberta.py
 src/transformers/models/roberta/modeling_roberta.py
 src/transformers/models/roberta/modeling_tf_roberta.py
@@ -413,12 +408,10 @@ src/transformers/models/t5/tokenization_t5.py
 src/transformers/models/t5/tokenization_t5_fast.py
 src/transformers/models/table_transformer/modeling_table_transformer.py
 src/transformers/models/tapas/tokenization_tapas.py
-src/transformers/models/tapex/tokenization_tapex.py
 src/transformers/models/time_series_transformer/configuration_time_series_transformer.py
 src/transformers/models/time_series_transformer/modeling_time_series_transformer.py
 src/transformers/models/timesformer/configuration_timesformer.py
 src/transformers/models/timesformer/modeling_timesformer.py
-src/transformers/models/trajectory_transformer/configuration_trajectory_transformer.py
 src/transformers/models/transfo_xl/configuration_transfo_xl.py
 src/transformers/models/transfo_xl/tokenization_transfo_xl.py
 src/transformers/models/trocr/configuration_trocr.py
@@ -431,7 +424,6 @@ src/transformers/models/unispeech/configuration_unispeech.py
 src/transformers/models/unispeech/modeling_unispeech.py
 src/transformers/models/unispeech_sat/modeling_unispeech_sat.py
 src/transformers/models/upernet/modeling_upernet.py
-src/transformers/models/van/modeling_van.py
 src/transformers/models/videomae/feature_extraction_videomae.py
 src/transformers/models/videomae/image_processing_videomae.py
 src/transformers/models/videomae/modeling_videomae.py