Unverified Commit a14b055b authored by Albert Villanova del Moral's avatar Albert Villanova del Moral Committed by GitHub
Browse files

Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code

* Pass trust_remote_code in more tests

* Add trust_remote_dataset_code arg to some tests

* Revert "Temporarily pin datasets upper version to fix CI"

This reverts commit b7672826.

* Pass trust_remote_code in librispeech_asr_dummy docstrings

* Revert "Pin datasets<2.20.0 for examples"

This reverts commit 833fc17a.

* Pass trust_remote_code to all examples

* Revert "Add trust_remote_dataset_code arg to some tests" to research_projects

* Pass trust_remote_code to tests

* Pass trust_remote_code to docstrings

* Fix flax examples tests requirements

* Pass trust_remote_dataset_code arg to tests

* Replace trust_remote_dataset_code with trust_remote_code in one example

* Fix duplicate trust_remote_code

* Replace args.trust_remote_dataset_code with args.trust_remote_code

* Replace trust_remote_dataset_code with trust_remote_code in parser

* Replace trust_remote_dataset_code with trust_remote_code in dataclasses

* Replace trust_remote_dataset_code with trust_remote_code arg
parent 485fd814
......@@ -548,7 +548,9 @@ class UniSpeechRobustModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.T
@slow
class UniSpeechModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -557,7 +559,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......
......@@ -812,7 +812,9 @@ class UniSpeechSatRobustModelTest(ModelTesterMixin, unittest.TestCase):
@slow
class UniSpeechSatModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -821,7 +823,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......
......@@ -327,7 +327,9 @@ class UnivNetFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
self.assertTrue(pt_processed.input_features.dtype == torch.float32)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", Audio(sampling_rate=self.feat_extract_tester.sampling_rate))
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -220,7 +220,9 @@ class UnivNetModelIntegrationTests(unittest.TestCase):
torch.cuda.empty_cache()
def _load_datasamples(self, num_samples, sampling_rate=24000):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", Audio(sampling_rate=sampling_rate))
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -637,7 +637,7 @@ class ViltModelIntegrationTest(unittest.TestCase):
processor = self.default_processor
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test")
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True)
image1 = Image.open(dataset[0]["file"]).convert("RGB")
image2 = Image.open(dataset[1]["file"]).convert("RGB")
......
......@@ -815,7 +815,7 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
def test_inference_handwritten(self):
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"]).convert("RGB")
processor = self.default_processor
......@@ -840,7 +840,7 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
def test_inference_printed(self):
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
image = Image.open(dataset[1]["file"]).convert("RGB")
processor = self.default_processor
......
......@@ -72,7 +72,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
try:
_ = in_queue.get(timeout=timeout)
ds = load_dataset("common_voice", "es", split="test", streaming=True)
ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
sample = next(iter(ds))
resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
......@@ -489,7 +489,9 @@ class FlaxWav2Vec2UtilsTest(unittest.TestCase):
@slow
class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -585,7 +587,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode
@require_librosa
def test_wav2vec2_with_lm(self):
ds = load_dataset("common_voice", "es", split="test", streaming=True)
ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
sample = next(iter(ds))
resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
......@@ -604,7 +606,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode
@require_librosa
def test_wav2vec2_with_lm_pool(self):
ds = load_dataset("common_voice", "es", split="test", streaming=True)
ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
sample = next(iter(ds))
resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
......
......@@ -716,7 +716,9 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
gc.collect()
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -725,7 +727,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......
......@@ -101,7 +101,9 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
try:
_ = in_queue.get(timeout=timeout)
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample(
......@@ -1468,7 +1470,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
backend_empty_cache(torch_device)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -1477,7 +1481,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......@@ -1843,7 +1847,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode
@require_torchaudio
def test_wav2vec2_with_lm(self):
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample(
......@@ -1867,7 +1873,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode
@require_torchaudio
def test_wav2vec2_with_lm_pool(self):
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample(
......@@ -1965,7 +1973,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
def run_model(lang):
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
)
sample = next(iter(ds))
wav2vec2_lang = LANG_MAP[lang]
......
......@@ -855,7 +855,9 @@ class Wav2Vec2BertUtilsTest(unittest.TestCase):
@slow
class Wav2Vec2BertModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)])
speech_samples = speech_samples[:num_samples]["audio"]
......
......@@ -866,7 +866,9 @@ class Wav2Vec2ConformerUtilsTest(unittest.TestCase):
@slow
class Wav2Vec2ConformerModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)])
speech_samples = speech_samples[:num_samples]["audio"]
......
......@@ -463,7 +463,9 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
def test_word_time_stamp_integration(self):
import torch
ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
ds_iter = iter(ds)
sample = next(ds_iter)
......
......@@ -494,7 +494,9 @@ class WavLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
@slow
class WavLMModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -503,7 +505,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......
......@@ -215,7 +215,9 @@ class WhisperFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
self.assertTrue(pt_processed.input_features.dtype == torch.float32)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -410,7 +410,9 @@ class FlaxWhisperModelIntegrationTest(unittest.TestCase):
return WhisperProcessor.from_pretrained("openai/whisper-base")
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......@@ -561,7 +563,7 @@ class FlaxWhisperModelIntegrationTest(unittest.TestCase):
processor = WhisperProcessor.from_pretrained("openai/whisper-large")
model = FlaxWhisperForConditionalGeneration.from_pretrained("openai/whisper-large", from_pt=True)
ds = load_dataset("common_voice", "ja", split="test", streaming=True)
ds = load_dataset("legacy-datasets/common_voice", "ja", split="test", streaming=True, trust_remote_code=True)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_speech = next(iter(ds))["audio"]["array"]
input_features = processor.feature_extractor(raw_speech=input_speech, return_tensors="np")
......
......@@ -704,7 +704,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
def _load_datasamples(num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......@@ -795,7 +795,7 @@ def _test_large_generation_multilingual(in_queue, out_queue, timeout):
processor = WhisperProcessor.from_pretrained("openai/whisper-large")
model = TFWhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
ds = load_dataset("common_voice", "ja", split="test", streaming=True)
ds = load_dataset("legacy-datasets/common_voice", "ja", split="test", streaming=True, trust_remote_code=True)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_speech = next(iter(ds))["audio"]["array"]
input_features = processor.feature_extractor(raw_speech=input_speech, return_tensors="tf").input_features
......
......@@ -1552,7 +1552,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
return WhisperProcessor.from_pretrained("openai/whisper-base")
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......@@ -1763,7 +1765,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
model.to(torch_device)
ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
ds = load_dataset(
"facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_speech = next(iter(ds))["audio"]["array"]
......@@ -1830,7 +1834,14 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model.to(torch_device)
token = os.getenv("HF_HUB_READ_TOKEN", True)
ds = load_dataset("mozilla-foundation/common_voice_6_1", "ja", split="test", streaming=True, token=token)
ds = load_dataset(
"mozilla-foundation/common_voice_6_1",
"ja",
split="test",
streaming=True,
token=token,
trust_remote_code=True,
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_speech = next(iter(ds))["audio"]["array"]
......@@ -2358,7 +2369,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
)
assistant_model.to(torch_device)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"]
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
......@@ -2407,7 +2420,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
)
assistant_model.to(torch_device)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"]
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
......@@ -2448,7 +2463,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor(
......@@ -2484,7 +2499,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
prompt = "Mr. Kilter, Brionno." # let's force Quilter -> Kilter, Brion -> Brionno
prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]", trust_remote_code=True
)
one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32)
first_text = ds[0]["text"].lower()
......@@ -2535,7 +2552,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor(
......@@ -2568,7 +2585,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor(
......@@ -2610,7 +2627,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
audios = []
audios.append(one_audio[110000:])
......@@ -2664,7 +2681,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
audios = []
audios.append(one_audio[110000:])
......
......@@ -69,7 +69,9 @@ class AudioClassificationPipelineTests(unittest.TestCase):
import datasets
# test with a local file
dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
dataset = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
audio = dataset[0]["audio"]["array"]
output = audio_classifier(audio)
self.assertEqual(
......@@ -115,7 +117,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
model = "superb/wav2vec2-base-superb-ks"
audio_classifier = pipeline("audio-classification", model=model)
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)
audio = np.array(dataset[3]["speech"], dtype=np.float32)
output = audio_classifier(audio, top_k=4)
......
......@@ -206,7 +206,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@require_torch
@require_pyctcdecode
def test_large_model_pt_with_lm(self):
dataset = load_dataset("Narsil/asr_dummy", streaming=True)
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
third_item = next(iter(dataset["test"].skip(3)))
filename = third_item["file"]
......@@ -296,7 +296,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": ""})
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
......@@ -313,7 +315,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": ""})
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
......@@ -328,7 +332,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=8,
stride_length_s=1,
)
data = load_dataset("librispeech_asr", "clean", split="test", streaming=True)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
sample = next(iter(data))
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="en", task="transcribe")
......@@ -371,7 +375,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
task="automatic-speech-recognition",
model="openai/whisper-tiny.en",
)
data = load_dataset("librispeech_asr", "clean", split="test", streaming=True)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
samples = [next(iter(data)) for _ in range(8)]
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
......@@ -488,7 +492,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-tiny",
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
......@@ -663,7 +669,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
@require_torch
def test_whisper_timestamp_prediction(self):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
array = np.concatenate(
[ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
)
......@@ -761,7 +769,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
@require_torch
def test_whisper_large_timestamp_prediction(self):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
array = np.concatenate(
[ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
)
......@@ -855,7 +865,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=3,
return_timestamps="word",
)
data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
data = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = data[0]["audio"]
# not the same output as test_simple_whisper_asr because of chunking
......@@ -898,7 +910,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-large-v3",
return_timestamps="word",
)
data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
data = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = data[0]["audio"]
# not the same output as test_simple_whisper_asr because of chunking
......@@ -943,7 +957,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
......@@ -961,7 +977,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = asr(waveform)
self.assertEqual(output, {"text": ""})
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = asr(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
......@@ -987,7 +1005,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = asr(waveform)
self.assertEqual(output, {"text": "(Applausi)"})
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = asr(filename)
self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
......@@ -1007,7 +1027,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-tiny.en",
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
filename = ds[0]["file"]
output = speech_recognizer(filename)
self.assertEqual(
......@@ -1076,7 +1098,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-large",
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
......@@ -1111,7 +1135,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-tiny.en",
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
filename = ds[0]["file"]
# 1. English-only model compatible with no language argument
......@@ -1144,7 +1170,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
def test_speculative_decoding_whisper_non_distil(self):
# Load data:
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
)
sample = dataset[0]["audio"]
# Load model:
......@@ -1188,7 +1216,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
def test_speculative_decoding_whisper_distil(self):
# Load data:
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
)
sample = dataset[0]["audio"]
# Load model:
......@@ -1240,7 +1270,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A man said to the universe: “Sir, I exist."})
......@@ -1256,7 +1288,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "Ein Mann sagte zu dem Universum, Sir, ich bin da."})
......@@ -1273,7 +1307,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
......@@ -1290,7 +1326,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt",
)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"]
output = speech_recognizer(sample)
......@@ -1307,7 +1345,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=10.0,
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 2
......@@ -1323,7 +1363,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="hf-internal-testing/tiny-random-wav2vec2",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
# Take short audio to keep the test readable
audio = ds[40]["audio"]["array"][:800]
......@@ -1367,7 +1409,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=10.0,
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 2
......@@ -1395,7 +1439,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
)
self.assertEqual(speech_recognizer.type, "ctc_with_lm")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 2
......@@ -1423,7 +1469,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
)
self.assertEqual(speech_recognizer.type, "ctc_with_lm")
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 2
......@@ -1507,7 +1555,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
device=torch_device,
)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"]
result = pipe(sample, generate_kwargs={"tgt_lang": "eng"})
......@@ -1530,7 +1580,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=10.0,
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 10
......@@ -1642,7 +1694,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="patrickvonplaten/wav2vec2-base-100h-with-lm",
chunk_length_s=10.0,
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 10
......
......@@ -840,7 +840,9 @@ class CustomPipelineTest(unittest.TestCase):
def test_chunk_pipeline_batching_single_file(self):
# Make sure we have cached the pipeline.
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
ds = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"]
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment