Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commit b7672826. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit 833fc17a. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg

Pass datasets trust_remote_code (#31406)
* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commit b7672826. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit 833fc17a. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
a14b055b · Albert Villanova del Moral · GitHub · 485fd814 · a14b055b · a14b055b
Unverified Commit a14b055b authored Jun 17, 2024 by Albert Villanova del Moral Committed by GitHub Jun 17, 2024
20 changed files
--- a/src/transformers/models/whisper/modeling_whisper.py
+++ b/src/transformers/models/whisper/modeling_whisper.py
@@ -1590,7 +1590,7 @@ class WhisperModel(WhisperPreTrainedModel):
         >>> model = WhisperModel.from_pretrained("openai/whisper-base")
         >>> feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-base")
-         >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+         >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
         >>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
         >>> input_features = inputs.input_features
         >>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
@@ -1731,7 +1731,7 @@ class WhisperForConditionalGeneration(WhisperGenerationMixin, WhisperPreTrainedM
        >>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
        >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
-        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
        >>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt")
        >>> input_features = inputs.input_features
@@ -1983,7 +1983,7 @@ class WhisperForCausalLM(WhisperPreTrainedModel):
        >>> assistant_model = WhisperForCausalLM.from_pretrained("distil-whisper/distil-large-v2")
-        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
        >>> sample = ds[0]["audio"]
        >>> input_features = processor(
        ...     sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt"

--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -385,7 +385,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
    >>> import torch
    >>> from datasets import load_dataset
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -411,7 +411,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
    >>> from datasets import load_dataset
    >>> import torch
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -446,7 +446,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
    >>> from datasets import load_dataset
    >>> import torch
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -482,7 +482,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
    >>> from datasets import load_dataset
    >>> import torch
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -511,7 +511,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
    >>> from datasets import load_dataset
    >>> import torch
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -546,7 +546,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
    >>> import torch
    >>> from datasets import load_dataset
-    >>> dataset = load_dataset("huggingface/cats-image")
+    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]
    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@@ -571,7 +571,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
    >>> import torch
    >>> from datasets import load_dataset
-    >>> dataset = load_dataset("huggingface/cats-image")
+    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]
    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@@ -803,7 +803,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
    >>> from transformers import AutoProcessor, {model_class}
    >>> from datasets import load_dataset
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -828,7 +828,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
    >>> from datasets import load_dataset
    >>> import tensorflow as tf
-    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate
@@ -863,7 +863,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> from datasets import load_dataset
-    >>> dataset = load_dataset("huggingface/cats-image")
+    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]
    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@@ -886,7 +886,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
    >>> import tensorflow as tf
    >>> from datasets import load_dataset
-    >>> dataset = load_dataset("huggingface/cats-image")
+    >>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
    >>> image = dataset["test"]["image"][0]
    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")

--- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
+++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
@@ -128,9 +128,9 @@ class ModelArguments:
        default=False,
        metadata={
            "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
-                "execute code present on the Hub on your local machine."
+                " code, as it will execute code present on the Hub on your local machine."
            )
        },
    )
@@ -274,7 +274,11 @@ def main():
    # download the dataset.
    if data_args.dataset_name is not None:
        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name)
+        raw_datasets = load_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            trust_remote_code=model_args.trust_remote_code,
+        )
    else:
        data_files = {}
        if data_args.train_file is not None:
@@ -568,6 +572,15 @@ def parse_args():
        default=None,
        help= "The configuration name of the dataset to use (via the datasets library).",
    )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help=(
+            "Whether to trust the execution of code from datasets/models defined on the Hub."
+            " This option should only be set to `True` for repositories you trust and in which you have read the"
+            " code, as it will execute code present on the Hub on your local machine."
+        ),
+    )
    parser.add_argument(
        "--train_file", type=str, default=None, help="A csv or a json file containing the training data."
    )
@@ -725,7 +738,9 @@ def main():
    # download the dataset.
    if args.dataset_name is not None:
        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(args.dataset_name, args.dataset_config_name)
+        raw_datasets = load_dataset(
+            args.dataset_name, args.dataset_config_name, trust_remote_code=args.trust_remote_code
+        )
    else:
        data_files = {}
        if args.train_file is not None:

--- a/tests/deepspeed/test_model_zoo.py
+++ b/tests/deepspeed/test_model_zoo.py
@@ -269,6 +269,7 @@ def make_task_cmds():
        "img_clas": f"""
        {scripts_dir}/image-classification/run_image_classification.py
            --dataset_name hf-internal-testing/cats_vs_dogs_sample
+            --trust_remote_code
            --remove_unused_columns False
            --max_steps 10
            --image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json

--- a/tests/models/audio_spectrogram_transformer/test_feature_extraction_audio_spectrogram_transformer.py
+++ b/tests/models/audio_spectrogram_transformer/test_feature_extraction_audio_spectrogram_transformer.py
@@ -153,7 +153,9 @@ class ASTFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Test
    def _load_datasamples(self, num_samples):
        from datasets import load_dataset
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/beit/test_image_processing_beit.py
+++ b/tests/models/beit/test_image_processing_beit.py
@@ -96,7 +96,7 @@ class BeitImageProcessingTester(unittest.TestCase):
 def prepare_semantic_single_inputs():
-    dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
+    dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
    image = Image.open(dataset[0]["file"])
    map = Image.open(dataset[1]["file"])
@@ -105,7 +105,7 @@ def prepare_semantic_single_inputs():
 def prepare_semantic_batch_inputs():
-    ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
+    ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
    image1 = Image.open(ds[0]["file"])
    map1 = Image.open(ds[1]["file"])

--- a/tests/models/beit/test_modeling_beit.py
+++ b/tests/models/beit/test_modeling_beit.py
@@ -484,7 +484,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
        image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
-        ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
+        ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
        image = Image.open(ds[0]["file"])
        inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
@@ -527,7 +527,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
        image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
-        ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
+        ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
        image = Image.open(ds[0]["file"])
        inputs = image_processor(images=image, return_tensors="pt").to(torch_device)

--- a/tests/models/bloom/test_tokenization_bloom.py
+++ b/tests/models/bloom/test_tokenization_bloom.py
@@ -123,7 +123,7 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            - https://huggingface.co/bigscience/tokenizer/
        """
        tokenizer = self.get_rust_tokenizer()
-        ds = load_dataset("xnli", "all_languages", split="test", streaming=True)
+        ds = load_dataset("facebook/xnli", "all_languages", split="test", streaming=True)
        sample_data = next(iter(ds))["premise"]  # pick up one data
        input_text = list(sample_data.values())

--- a/tests/models/clap/test_feature_extraction_clap.py
+++ b/tests/models/clap/test_feature_extraction_clap.py
@@ -164,7 +164,9 @@ class ClapFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Tes
    # Copied from tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTest._load_datasamples
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/clap/test_modeling_clap.py
+++ b/tests/models/clap/test_modeling_clap.py
@@ -665,7 +665,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
            "repeat": 0.0023,
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        audio_sample = librispeech_dummy[-1]
        model_id = "laion/clap-htsat-unfused"
@@ -692,7 +694,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
            "pad": -0.000379,
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        audio_sample = librispeech_dummy[-1]
        model_id = "laion/clap-htsat-fused"
@@ -719,7 +723,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
            "pad": 0.0006,
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        audio_samples = [sample["array"] for sample in librispeech_dummy[0:4]["audio"]]
        model_id = "laion/clap-htsat-fused"
@@ -746,7 +752,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
            "pad": 0.0019,
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        audio_samples = [sample["array"] for sample in librispeech_dummy[0:4]["audio"]]
        model_id = "laion/clap-htsat-unfused"

--- a/tests/models/clvp/test_feature_extraction_clvp.py
+++ b/tests/models/clvp/test_feature_extraction_clvp.py
@@ -209,7 +209,9 @@ class ClvpFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Tes
            self.assertTrue(pt_processed.input_features.dtype == torch.float32)
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        ds = ds.cast_column("audio", Audio(sampling_rate=22050))
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/clvp/test_modeling_clvp.py
+++ b/tests/models/clvp/test_modeling_clvp.py
@@ -371,7 +371,9 @@ class ClvpModelForConditionalGenerationTester:
    def prepare_config_and_inputs(self):
        _, input_ids, attention_mask = self.clvp_encoder_tester.prepare_config_and_inputs()
-        ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = datasets.load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
        _, audio, sr = ds.sort("id").select(range(1))[:1]["audio"][0].values()
@@ -553,7 +555,9 @@ class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase)
 class ClvpIntegrationTest(unittest.TestCase):
    def setUp(self):
        self.text = "This is an example text."
-        ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = datasets.load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
        _, self.speech_samples, self.sr = ds.sort("id").select(range(1))[:1]["audio"][0].values()

--- a/tests/models/code_llama/test_tokenization_code_llama.py
+++ b/tests/models/code_llama/test_tokenization_code_llama.py
@@ -493,7 +493,7 @@ class LlamaIntegrationTest(unittest.TestCase):
        pyth_tokenizer = self.tokenizer
        rust_tokenizer = self.rust_tokenizer
-        dataset = load_dataset("code_x_glue_ct_code_to_text", "go")
+        dataset = load_dataset("google/code_x_glue_ct_code_to_text", "go")
        for item in tqdm.tqdm(dataset["validation"]):
            string = item["code"]
            encoded1 = pyth_tokenizer.encode(string)
@@ -506,7 +506,7 @@ class LlamaIntegrationTest(unittest.TestCase):
            self.assertEqual(decoded1, decoded2)
-        dataset = load_dataset("xnli", "all_languages")
+        dataset = load_dataset("facebook/xnli", "all_languages")
        for item in tqdm.tqdm(dataset["train"]):
            for string in item["premise"].values():

--- a/tests/models/data2vec/test_modeling_data2vec_audio.py
+++ b/tests/models/data2vec/test_modeling_data2vec_audio.py
@@ -697,7 +697,9 @@ class Data2VecAudioUtilsTest(unittest.TestCase):
 @slow
 class Data2VecAudioModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -706,7 +708,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
        return [x["array"] for x in speech_samples]
    def _load_superb(self, task, num_samples):
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
        return ds[:num_samples]

--- a/tests/models/encodec/test_feature_extraction_encodec.py
+++ b/tests/models/encodec/test_feature_extraction_encodec.py
@@ -138,7 +138,9 @@ class EnCodecFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
    def _load_datasamples(self, num_samples):
        from datasets import load_dataset
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        audio_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@@ -462,7 +462,9 @@ class EncodecIntegrationTest(unittest.TestCase):
            "1.5": [371955],
            "24.0": [6659962],
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        model_id = "facebook/encodec_24khz"
        model = EncodecModel.from_pretrained(model_id).to(torch_device)
@@ -516,7 +518,9 @@ class EncodecIntegrationTest(unittest.TestCase):
            "3.0": [144259, 146765, 156435, 176871, 161971],
            "24.0": [1568553, 1294948, 1306190, 1464747, 1663150],
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        model_id = "facebook/encodec_48khz"
        model = EncodecModel.from_pretrained(model_id).to(torch_device)
@@ -578,7 +582,9 @@ class EncodecIntegrationTest(unittest.TestCase):
                [85561, 81870, 76953, 48967, 79315, 85442, 81479, 107241],
            ],
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        model_id = "facebook/encodec_48khz"
        model = EncodecModel.from_pretrained(model_id).to(torch_device)

--- a/tests/models/gemma/test_tokenization_gemma.py
+++ b/tests/models/gemma/test_tokenization_gemma.py
@@ -314,7 +314,7 @@ class GemmaIntegrationTest(unittest.TestCase):
        pyth_tokenizer = self.tokenizer
        rust_tokenizer = self.rust_tokenizer
-        dataset = load_dataset("code_x_glue_ct_code_to_text", "go")
+        dataset = load_dataset("google/code_x_glue_ct_code_to_text", "go")
        for item in tqdm.tqdm(dataset["validation"]):
            string = item["code"]
            encoded1 = pyth_tokenizer.encode(string)
@@ -333,7 +333,7 @@ class GemmaIntegrationTest(unittest.TestCase):
            self.assertEqual(decoded1, decoded2)
-        dataset = load_dataset("xnli", "all_languages")
+        dataset = load_dataset("facebook/xnli", "all_languages")
        for item in tqdm.tqdm(dataset["train"]):
            for string in item["premise"].values():

--- a/tests/models/hubert/test_modeling_hubert.py
+++ b/tests/models/hubert/test_modeling_hubert.py
@@ -757,7 +757,9 @@ class HubertModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
        from datasets import load_dataset
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -768,7 +770,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
    def _load_superb(self, task, num_samples):
        from datasets import load_dataset
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
        return ds[:num_samples]

--- a/tests/models/hubert/test_modeling_tf_hubert.py
+++ b/tests/models/hubert/test_modeling_tf_hubert.py
@@ -609,7 +609,9 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
        from datasets import load_dataset
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]

--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -103,7 +103,7 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
        from datasets import load_dataset
-        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
+        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
        image = Image.open(ds[0]["file"]).convert("RGB")