Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commit b7672826. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit 833fc17a. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg

Pass datasets trust_remote_code (#31406)
* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commit b7672826. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit 833fc17a. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
a14b055b · Albert Villanova del Moral · GitHub · 485fd814 · a14b055b · a14b055b
Unverified Commit a14b055b authored Jun 17, 2024 by Albert Villanova del Moral Committed by GitHub Jun 17, 2024
20 changed files
--- a/tests/models/unispeech/test_modeling_unispeech.py
+++ b/tests/models/unispeech/test_modeling_unispeech.py
@@ -548,7 +548,9 @@ class UniSpeechRobustModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.T
 @slow
 class UniSpeechModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -557,7 +559,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
        return [x["array"] for x in speech_samples]

    def _load_superb(self, task, num_samples):
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)

        return ds[:num_samples]


--- a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py
+++ b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py
@@ -812,7 +812,9 @@ class UniSpeechSatRobustModelTest(ModelTesterMixin, unittest.TestCase):
 @slow
 class UniSpeechSatModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -821,7 +823,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
        return [x["array"] for x in speech_samples]

    def _load_superb(self, task, num_samples):
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)

        return ds[:num_samples]


--- a/tests/models/univnet/test_feature_extraction_univnet.py
+++ b/tests/models/univnet/test_feature_extraction_univnet.py
@@ -327,7 +327,9 @@ class UnivNetFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
            self.assertTrue(pt_processed.input_features.dtype == torch.float32)

    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        ds = ds.cast_column("audio", Audio(sampling_rate=self.feat_extract_tester.sampling_rate))
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/univnet/test_modeling_univnet.py
+++ b/tests/models/univnet/test_modeling_univnet.py
@@ -220,7 +220,9 @@ class UnivNetModelIntegrationTests(unittest.TestCase):
        torch.cuda.empty_cache()

    def _load_datasamples(self, num_samples, sampling_rate=24000):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        ds = ds.cast_column("audio", Audio(sampling_rate=sampling_rate))
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/vilt/test_modeling_vilt.py
+++ b/tests/models/vilt/test_modeling_vilt.py
@@ -637,7 +637,7 @@ class ViltModelIntegrationTest(unittest.TestCase):

        processor = self.default_processor

-        dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test")
+        dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True)
        image1 = Image.open(dataset[0]["file"]).convert("RGB")
        image2 = Image.open(dataset[1]["file"]).convert("RGB")


--- a/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py
+++ b/tests/models/vision_encoder_decoder/test_modeling_vision_encoder_decoder.py
@@ -815,7 +815,7 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
    def test_inference_handwritten(self):
        model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)

-        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
+        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
        image = Image.open(dataset[0]["file"]).convert("RGB")

        processor = self.default_processor
@@ -840,7 +840,7 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
    def test_inference_printed(self):
        model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)

-        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
+        dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
        image = Image.open(dataset[1]["file"]).convert("RGB")

        processor = self.default_processor

--- a/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_flax_wav2vec2.py
@@ -72,7 +72,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
    try:
        _ = in_queue.get(timeout=timeout)

-        ds = load_dataset("common_voice", "es", split="test", streaming=True)
+        ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
        sample = next(iter(ds))

        resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
@@ -489,7 +489,9 @@ class FlaxWav2Vec2UtilsTest(unittest.TestCase):
 @slow
 class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -585,7 +587,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
    @require_pyctcdecode
    @require_librosa
    def test_wav2vec2_with_lm(self):
-        ds = load_dataset("common_voice", "es", split="test", streaming=True)
+        ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
        sample = next(iter(ds))

        resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
@@ -604,7 +606,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
    @require_pyctcdecode
    @require_librosa
    def test_wav2vec2_with_lm_pool(self):
-        ds = load_dataset("common_voice", "es", split="test", streaming=True)
+        ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
        sample = next(iter(ds))

        resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)

--- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
@@ -716,7 +716,9 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
        gc.collect()

    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -725,7 +727,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
        return [x["array"] for x in speech_samples]

    def _load_superb(self, task, num_samples):
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)

        return ds[:num_samples]


--- a/tests/models/wav2vec2/test_modeling_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py
@@ -101,7 +101,9 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
    try:
        _ = in_queue.get(timeout=timeout)

-        ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
+        ds = load_dataset(
+            "mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
+        )
        sample = next(iter(ds))

        resampled_audio = torchaudio.functional.resample(
@@ -1468,7 +1470,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
        backend_empty_cache(torch_device)

    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -1477,7 +1481,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
        return [x["array"] for x in speech_samples]

    def _load_superb(self, task, num_samples):
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)

        return ds[:num_samples]

@@ -1843,7 +1847,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
    @require_pyctcdecode
    @require_torchaudio
    def test_wav2vec2_with_lm(self):
-        ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
+        ds = load_dataset(
+            "mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
+        )
        sample = next(iter(ds))

        resampled_audio = torchaudio.functional.resample(
@@ -1867,7 +1873,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
    @require_pyctcdecode
    @require_torchaudio
    def test_wav2vec2_with_lm_pool(self):
-        ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
+        ds = load_dataset(
+            "mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
+        )
        sample = next(iter(ds))

        resampled_audio = torchaudio.functional.resample(
@@ -1965,7 +1973,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
        LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}

        def run_model(lang):
-            ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
+            ds = load_dataset(
+                "mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
+            )
            sample = next(iter(ds))

            wav2vec2_lang = LANG_MAP[lang]

--- a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py
+++ b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py
@@ -855,7 +855,9 @@ class Wav2Vec2BertUtilsTest(unittest.TestCase):
 @slow
 class Wav2Vec2BertModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)])
        speech_samples = speech_samples[:num_samples]["audio"]

--- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
+++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
@@ -866,7 +866,9 @@ class Wav2Vec2ConformerUtilsTest(unittest.TestCase):
 @slow
 class Wav2Vec2ConformerModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)])
        speech_samples = speech_samples[:num_samples]["audio"]

--- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
+++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
@@ -463,7 +463,9 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
    def test_word_time_stamp_integration(self):
        import torch

-        ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
+        ds = load_dataset(
+            "mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
+        )
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
        ds_iter = iter(ds)
        sample = next(ds_iter)

--- a/tests/models/wavlm/test_modeling_wavlm.py
+++ b/tests/models/wavlm/test_modeling_wavlm.py
@@ -494,7 +494,9 @@ class WavLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
 @slow
 class WavLMModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").filter(
            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
@@ -503,7 +505,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
        return [x["array"] for x in speech_samples]

    def _load_superb(self, task, num_samples):
-        ds = load_dataset("anton-l/superb_dummy", task, split="test")
+        ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)

        return ds[:num_samples]


--- a/tests/models/whisper/test_feature_extraction_whisper.py
+++ b/tests/models/whisper/test_feature_extraction_whisper.py
@@ -215,7 +215,9 @@ class WhisperFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
            self.assertTrue(pt_processed.input_features.dtype == torch.float32)

    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]


--- a/tests/models/whisper/test_modeling_flax_whisper.py
+++ b/tests/models/whisper/test_modeling_flax_whisper.py
@@ -410,7 +410,9 @@ class FlaxWhisperModelIntegrationTest(unittest.TestCase):
        return WhisperProcessor.from_pretrained("openai/whisper-base")

    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

@@ -561,7 +563,7 @@ class FlaxWhisperModelIntegrationTest(unittest.TestCase):
        processor = WhisperProcessor.from_pretrained("openai/whisper-large")
        model = FlaxWhisperForConditionalGeneration.from_pretrained("openai/whisper-large", from_pt=True)

-        ds = load_dataset("common_voice", "ja", split="test", streaming=True)
+        ds = load_dataset("legacy-datasets/common_voice", "ja", split="test", streaming=True, trust_remote_code=True)
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
        input_speech = next(iter(ds))["audio"]["array"]
        input_features = processor.feature_extractor(raw_speech=input_speech, return_tensors="np")

--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -704,7 +704,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC


 def _load_datasamples(num_samples):
-    ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+    ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
    # automatic decoding with librispeech
    speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

@@ -795,7 +795,7 @@ def _test_large_generation_multilingual(in_queue, out_queue, timeout):
        processor = WhisperProcessor.from_pretrained("openai/whisper-large")
        model = TFWhisperForConditionalGeneration.from_pretrained("openai/whisper-large")

-        ds = load_dataset("common_voice", "ja", split="test", streaming=True)
+        ds = load_dataset("legacy-datasets/common_voice", "ja", split="test", streaming=True, trust_remote_code=True)
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
        input_speech = next(iter(ds))["audio"]["array"]
        input_features = processor.feature_extractor(raw_speech=input_speech, return_tensors="tf").input_features

--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -1552,7 +1552,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        return WhisperProcessor.from_pretrained("openai/whisper-base")

    def _load_datasamples(self, num_samples):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

@@ -1763,7 +1765,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
        model.to(torch_device)

-        ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
+        ds = load_dataset(
+            "facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
+        )
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))

        input_speech = next(iter(ds))["audio"]["array"]
@@ -1830,7 +1834,14 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model.to(torch_device)

        token = os.getenv("HF_HUB_READ_TOKEN", True)
-        ds = load_dataset("mozilla-foundation/common_voice_6_1", "ja", split="test", streaming=True, token=token)
+        ds = load_dataset(
+            "mozilla-foundation/common_voice_6_1",
+            "ja",
+            split="test",
+            streaming=True,
+            token=token,
+            trust_remote_code=True,
+        )
        ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))

        input_speech = next(iter(ds))["audio"]["array"]
@@ -2358,7 +2369,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        )
        assistant_model.to(torch_device)

-        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        dataset = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        sample = dataset[0]["audio"]

        input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
@@ -2407,7 +2420,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        )
        assistant_model.to(torch_device)

-        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        dataset = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        sample = dataset[0]["audio"]

        input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
@@ -2448,7 +2463,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
        model = model.to(torch_device)

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
        one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)

        input_features = processor(
@@ -2484,7 +2499,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        prompt = "Mr. Kilter, Brionno."  # let's force Quilter -> Kilter, Brion -> Brionno
        prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device)

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]", trust_remote_code=True
+        )
        one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32)

        first_text = ds[0]["text"].lower()
@@ -2535,7 +2552,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
        model = model.to(torch_device)

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
        one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)

        input_features = processor(
@@ -2568,7 +2585,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
        model = model.to(torch_device)

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
        one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)

        input_features = processor(
@@ -2610,7 +2627,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
        model = model.to(torch_device)

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
        one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
        audios = []
        audios.append(one_audio[110000:])
@@ -2664,7 +2681,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
        model = model.to(torch_device)

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
        one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
        audios = []
        audios.append(one_audio[110000:])

--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -69,7 +69,9 @@ class AudioClassificationPipelineTests(unittest.TestCase):
        import datasets

        # test with a local file
-        dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        dataset = datasets.load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        audio = dataset[0]["audio"]["array"]
        output = audio_classifier(audio)
        self.assertEqual(
@@ -115,7 +117,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
        model = "superb/wav2vec2-base-superb-ks"

        audio_classifier = pipeline("audio-classification", model=model)
-        dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
+        dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)

        audio = np.array(dataset[3]["speech"], dtype=np.float32)
        output = audio_classifier(audio, top_k=4)

--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -206,7 +206,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    @require_torch
    @require_pyctcdecode
    def test_large_model_pt_with_lm(self):
-        dataset = load_dataset("Narsil/asr_dummy", streaming=True)
+        dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
        third_item = next(iter(dataset["test"].skip(3)))
        filename = third_item["file"]

@@ -296,7 +296,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = speech_recognizer(waveform)
        self.assertEqual(output, {"text": ""})

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
@@ -313,7 +315,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = speech_recognizer(waveform)
        self.assertEqual(output, {"text": ""})

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
@@ -328,7 +332,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            chunk_length_s=8,
            stride_length_s=1,
        )
-        data = load_dataset("librispeech_asr", "clean", split="test", streaming=True)
+        data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
        sample = next(iter(data))
        pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="en", task="transcribe")

@@ -371,7 +375,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            task="automatic-speech-recognition",
            model="openai/whisper-tiny.en",
        )
-        data = load_dataset("librispeech_asr", "clean", split="test", streaming=True)
+        data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
        samples = [next(iter(data)) for _ in range(8)]
        audio = np.concatenate([sample["audio"]["array"] for sample in samples])

@@ -488,7 +492,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="openai/whisper-tiny",
            framework="pt",
        )
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
@@ -663,7 +669,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    @slow
    @require_torch
    def test_whisper_timestamp_prediction(self):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        array = np.concatenate(
            [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
        )
@@ -761,7 +769,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    @slow
    @require_torch
    def test_whisper_large_timestamp_prediction(self):
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        array = np.concatenate(
            [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
        )
@@ -855,7 +865,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            chunk_length_s=3,
            return_timestamps="word",
        )
-        data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        data = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        sample = data[0]["audio"]

        # not the same output as test_simple_whisper_asr because of chunking
@@ -898,7 +910,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="openai/whisper-large-v3",
            return_timestamps="word",
        )
-        data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        data = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        sample = data[0]["audio"]

        # not the same output as test_simple_whisper_asr because of chunking
@@ -943,7 +957,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            framework="pt",
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
@@ -961,7 +977,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = asr(waveform)
        self.assertEqual(output, {"text": ""})

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
@@ -987,7 +1005,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = asr(waveform)
        self.assertEqual(output, {"text": "(Applausi)"})

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = asr(filename)
        self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
@@ -1007,7 +1027,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="openai/whisper-tiny.en",
            framework="pt",
        )
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        filename = ds[0]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(
@@ -1076,7 +1098,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="openai/whisper-large",
            framework="pt",
        )
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
@@ -1111,7 +1135,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="openai/whisper-tiny.en",
            framework="pt",
        )
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        filename = ds[0]["file"]

        # 1. English-only model compatible with no language argument
@@ -1144,7 +1170,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    @slow
    def test_speculative_decoding_whisper_non_distil(self):
        # Load data:
-        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
+        dataset = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
+        )
        sample = dataset[0]["audio"]

        # Load model:
@@ -1188,7 +1216,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
    @slow
    def test_speculative_decoding_whisper_distil(self):
        # Load data:
-        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
+        dataset = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
+        )
        sample = dataset[0]["audio"]

        # Load model:
@@ -1240,7 +1270,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            framework="pt",
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": "A man said to the universe: “Sir, I exist."})
@@ -1256,7 +1288,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            framework="pt",
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": "Ein Mann sagte zu dem Universum, Sir, ich bin da."})
@@ -1273,7 +1307,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            framework="pt",
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        filename = ds[40]["file"]

        output = speech_recognizer(filename)
@@ -1290,7 +1326,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            framework="pt",
        )

-        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        dataset = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        sample = dataset[0]["audio"]

        output = speech_recognizer(sample)
@@ -1307,7 +1345,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            chunk_length_s=10.0,
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 2
@@ -1323,7 +1363,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="hf-internal-testing/tiny-random-wav2vec2",
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        # Take short audio to keep the test readable
        audio = ds[40]["audio"]["array"][:800]

@@ -1367,7 +1409,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            chunk_length_s=10.0,
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 2
@@ -1395,7 +1439,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        )
        self.assertEqual(speech_recognizer.type, "ctc_with_lm")

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 2
@@ -1423,7 +1469,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        )
        self.assertEqual(speech_recognizer.type, "ctc_with_lm")

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 2
@@ -1507,7 +1555,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            device=torch_device,
        )

-        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        dataset = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        sample = dataset[0]["audio"]

        result = pipe(sample, generate_kwargs={"tgt_lang": "eng"})
@@ -1530,7 +1580,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            chunk_length_s=10.0,
        )

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 10
@@ -1642,7 +1694,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
            model="patrickvonplaten/wav2vec2-base-100h-with-lm",
            chunk_length_s=10.0,
        )
-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        n_repeats = 10

--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -840,7 +840,9 @@ class CustomPipelineTest(unittest.TestCase):
    def test_chunk_pipeline_batching_single_file(self):
        # Make sure we have cached the pipeline.
        pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
-        ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        ds = datasets.load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        ).sort("id")
        audio = ds[40]["audio"]["array"]

        pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")