Unverified Commit f83c6f1d authored by Sanchit Gandhi's avatar Sanchit Gandhi Committed by GitHub
Browse files

Remove `trust_remote_code` when loading Libri Dummy (#31748)

* [whisper integration] use parquet dataset for testing

* propagate to others

* more propagation

* last one
parent 3aefb4ec
...@@ -327,9 +327,7 @@ class UnivNetFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest. ...@@ -327,9 +327,7 @@ class UnivNetFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
self.assertTrue(pt_processed.input_features.dtype == torch.float32) self.assertTrue(pt_processed.input_features.dtype == torch.float32)
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", Audio(sampling_rate=self.feat_extract_tester.sampling_rate)) ds = ds.cast_column("audio", Audio(sampling_rate=self.feat_extract_tester.sampling_rate))
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
...@@ -216,9 +216,7 @@ class UnivNetModelIntegrationTests(unittest.TestCase): ...@@ -216,9 +216,7 @@ class UnivNetModelIntegrationTests(unittest.TestCase):
torch.cuda.empty_cache() torch.cuda.empty_cache()
def _load_datasamples(self, num_samples, sampling_rate=24000): def _load_datasamples(self, num_samples, sampling_rate=24000):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", Audio(sampling_rate=sampling_rate)) ds = ds.cast_column("audio", Audio(sampling_rate=sampling_rate))
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
...@@ -489,9 +489,7 @@ class FlaxWav2Vec2UtilsTest(unittest.TestCase): ...@@ -489,9 +489,7 @@ class FlaxWav2Vec2UtilsTest(unittest.TestCase):
@slow @slow
class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase): class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").filter( speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......
...@@ -716,9 +716,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase): ...@@ -716,9 +716,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
gc.collect() gc.collect()
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").filter( speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......
...@@ -1464,9 +1464,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): ...@@ -1464,9 +1464,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
backend_empty_cache(torch_device) backend_empty_cache(torch_device)
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").filter( speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......
...@@ -855,9 +855,7 @@ class Wav2Vec2BertUtilsTest(unittest.TestCase): ...@@ -855,9 +855,7 @@ class Wav2Vec2BertUtilsTest(unittest.TestCase):
@slow @slow
class Wav2Vec2BertModelIntegrationTest(unittest.TestCase): class Wav2Vec2BertModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]) speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)])
speech_samples = speech_samples[:num_samples]["audio"] speech_samples = speech_samples[:num_samples]["audio"]
......
...@@ -863,9 +863,7 @@ class Wav2Vec2ConformerUtilsTest(unittest.TestCase): ...@@ -863,9 +863,7 @@ class Wav2Vec2ConformerUtilsTest(unittest.TestCase):
@slow @slow
class Wav2Vec2ConformerModelIntegrationTest(unittest.TestCase): class Wav2Vec2ConformerModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]) speech_samples = ds.sort("id").filter(lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)])
speech_samples = speech_samples[:num_samples]["audio"] speech_samples = speech_samples[:num_samples]["audio"]
......
...@@ -491,9 +491,7 @@ class WavLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -491,9 +491,7 @@ class WavLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
@slow @slow
class WavLMModelIntegrationTest(unittest.TestCase): class WavLMModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").filter( speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)] lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......
...@@ -215,9 +215,7 @@ class WhisperFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest. ...@@ -215,9 +215,7 @@ class WhisperFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
self.assertTrue(pt_processed.input_features.dtype == torch.float32) self.assertTrue(pt_processed.input_features.dtype == torch.float32)
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
...@@ -410,9 +410,7 @@ class FlaxWhisperModelIntegrationTest(unittest.TestCase): ...@@ -410,9 +410,7 @@ class FlaxWhisperModelIntegrationTest(unittest.TestCase):
return WhisperProcessor.from_pretrained("openai/whisper-base") return WhisperProcessor.from_pretrained("openai/whisper-base")
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
...@@ -704,7 +704,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC ...@@ -704,7 +704,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
def _load_datasamples(num_samples): def _load_datasamples(num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
...@@ -1835,9 +1835,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -1835,9 +1835,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
return WhisperProcessor.from_pretrained("openai/whisper-base") return WhisperProcessor.from_pretrained("openai/whisper-base")
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech # automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
...@@ -2718,9 +2716,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -2718,9 +2716,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
) )
assistant_model.to(torch_device) assistant_model.to(torch_device)
dataset = load_dataset( dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"] sample = dataset[0]["audio"]
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
...@@ -2769,9 +2765,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -2769,9 +2765,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
) )
assistant_model.to(torch_device) assistant_model.to(torch_device)
dataset = load_dataset( dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"] sample = dataset[0]["audio"]
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
...@@ -2812,7 +2806,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -2812,7 +2806,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device) model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor( input_features = processor(
...@@ -2848,9 +2842,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -2848,9 +2842,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
prompt = "Mr. Kilter, Brionno." # let's force Quilter -> Kilter, Brion -> Brionno prompt = "Mr. Kilter, Brionno." # let's force Quilter -> Kilter, Brion -> Brionno
prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device) prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device)
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]", trust_remote_code=True
)
one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32) one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32)
first_text = ds[0]["text"].lower() first_text = ds[0]["text"].lower()
...@@ -2901,7 +2893,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -2901,7 +2893,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device) model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor( input_features = processor(
...@@ -2983,7 +2975,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -2983,7 +2975,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device) model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor( input_features = processor(
...@@ -3025,7 +3017,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -3025,7 +3017,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device) model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
audios = [] audios = []
audios.append(one_audio[110000:]) audios.append(one_audio[110000:])
...@@ -3079,7 +3071,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): ...@@ -3079,7 +3071,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model = model.to(torch_device) model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True) ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32) one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
audios = [] audios = []
audios.append(one_audio[110000:]) audios.append(one_audio[110000:])
......
...@@ -71,9 +71,7 @@ class AudioClassificationPipelineTests(unittest.TestCase): ...@@ -71,9 +71,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
import datasets import datasets
# test with a local file # test with a local file
dataset = datasets.load_dataset( dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
audio = dataset[0]["audio"]["array"] audio = dataset[0]["audio"]["array"]
output = audio_classifier(audio) output = audio_classifier(audio)
self.assertEqual( self.assertEqual(
......
...@@ -294,9 +294,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -294,9 +294,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = speech_recognizer(waveform) output = speech_recognizer(waveform)
self.assertEqual(output, {"text": ""}) self.assertEqual(output, {"text": ""})
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"}) self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
...@@ -313,9 +311,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -313,9 +311,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = speech_recognizer(waveform) output = speech_recognizer(waveform)
self.assertEqual(output, {"text": ""}) self.assertEqual(output, {"text": ""})
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": "a man said to the universe sir i exist"}) self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
...@@ -545,9 +541,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -545,9 +541,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-tiny", model="openai/whisper-tiny",
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."}) self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
...@@ -722,9 +716,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -722,9 +716,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow @slow
@require_torch @require_torch
def test_whisper_timestamp_prediction(self): def test_whisper_timestamp_prediction(self):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
array = np.concatenate( array = np.concatenate(
[ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]] [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
) )
...@@ -822,9 +814,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -822,9 +814,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow @slow
@require_torch @require_torch
def test_whisper_large_timestamp_prediction(self): def test_whisper_large_timestamp_prediction(self):
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
array = np.concatenate( array = np.concatenate(
[ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]] [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
) )
...@@ -918,9 +908,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -918,9 +908,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=3, chunk_length_s=3,
return_timestamps="word", return_timestamps="word",
) )
data = load_dataset( data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = data[0]["audio"] sample = data[0]["audio"]
# not the same output as test_simple_whisper_asr because of chunking # not the same output as test_simple_whisper_asr because of chunking
...@@ -963,9 +951,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -963,9 +951,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-large-v3", model="openai/whisper-large-v3",
return_timestamps="word", return_timestamps="word",
) )
data = load_dataset( data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = data[0]["audio"] sample = data[0]["audio"]
# not the same output as test_simple_whisper_asr because of chunking # not the same output as test_simple_whisper_asr because of chunking
...@@ -1010,9 +996,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1010,9 +996,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'}) self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
...@@ -1030,9 +1014,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1030,9 +1014,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = asr(waveform) output = asr(waveform)
self.assertEqual(output, {"text": ""}) self.assertEqual(output, {"text": ""})
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = asr(filename) output = asr(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"}) self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
...@@ -1058,9 +1040,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1058,9 +1040,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = asr(waveform) output = asr(waveform)
self.assertEqual(output, {"text": "(Applausi)"}) self.assertEqual(output, {"text": "(Applausi)"})
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = asr(filename) output = asr(filename)
self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."}) self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
...@@ -1080,9 +1060,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1080,9 +1060,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-tiny.en", model="openai/whisper-tiny.en",
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
filename = ds[0]["file"] filename = ds[0]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual( self.assertEqual(
...@@ -1151,9 +1129,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1151,9 +1129,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-large", model="openai/whisper-large",
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."}) self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
...@@ -1188,9 +1164,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1188,9 +1164,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="openai/whisper-tiny.en", model="openai/whisper-tiny.en",
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
filename = ds[0]["file"] filename = ds[0]["file"]
# 1. English-only model compatible with no language argument # 1. English-only model compatible with no language argument
...@@ -1323,9 +1297,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1323,9 +1297,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A man said to the universe: “Sir, I exist."}) self.assertEqual(output, {"text": "A man said to the universe: “Sir, I exist."})
...@@ -1341,9 +1313,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1341,9 +1313,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
self.assertEqual(output, {"text": "Ein Mann sagte zu dem Universum, Sir, ich bin da."}) self.assertEqual(output, {"text": "Ein Mann sagte zu dem Universum, Sir, ich bin da."})
...@@ -1360,9 +1330,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1360,9 +1330,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt", framework="pt",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
filename = ds[40]["file"] filename = ds[40]["file"]
output = speech_recognizer(filename) output = speech_recognizer(filename)
...@@ -1379,9 +1347,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1379,9 +1347,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
framework="pt", framework="pt",
) )
dataset = load_dataset( dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"] sample = dataset[0]["audio"]
output = speech_recognizer(sample) output = speech_recognizer(sample)
...@@ -1398,9 +1364,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1398,9 +1364,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=10.0, chunk_length_s=10.0,
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
n_repeats = 2 n_repeats = 2
...@@ -1416,9 +1380,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1416,9 +1380,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="hf-internal-testing/tiny-random-wav2vec2", model="hf-internal-testing/tiny-random-wav2vec2",
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
# Take short audio to keep the test readable # Take short audio to keep the test readable
audio = ds[40]["audio"]["array"][:800] audio = ds[40]["audio"]["array"][:800]
...@@ -1462,9 +1424,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1462,9 +1424,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=10.0, chunk_length_s=10.0,
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
n_repeats = 2 n_repeats = 2
...@@ -1492,9 +1452,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1492,9 +1452,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
) )
self.assertEqual(speech_recognizer.type, "ctc_with_lm") self.assertEqual(speech_recognizer.type, "ctc_with_lm")
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
n_repeats = 2 n_repeats = 2
...@@ -1522,9 +1480,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1522,9 +1480,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
) )
self.assertEqual(speech_recognizer.type, "ctc_with_lm") self.assertEqual(speech_recognizer.type, "ctc_with_lm")
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
n_repeats = 2 n_repeats = 2
...@@ -1608,9 +1564,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1608,9 +1564,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
device=torch_device, device=torch_device,
) )
dataset = load_dataset( dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
sample = dataset[0]["audio"] sample = dataset[0]["audio"]
result = pipe(sample, generate_kwargs={"tgt_lang": "eng"}) result = pipe(sample, generate_kwargs={"tgt_lang": "eng"})
...@@ -1633,9 +1587,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1633,9 +1587,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=10.0, chunk_length_s=10.0,
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
n_repeats = 10 n_repeats = 10
...@@ -1747,9 +1699,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1747,9 +1699,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model="patrickvonplaten/wav2vec2-base-100h-with-lm", model="patrickvonplaten/wav2vec2-base-100h-with-lm",
chunk_length_s=10.0, chunk_length_s=10.0,
) )
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
n_repeats = 10 n_repeats = 10
......
...@@ -840,9 +840,7 @@ class CustomPipelineTest(unittest.TestCase): ...@@ -840,9 +840,7 @@ class CustomPipelineTest(unittest.TestCase):
def test_chunk_pipeline_batching_single_file(self): def test_chunk_pipeline_batching_single_file(self):
# Make sure we have cached the pipeline. # Make sure we have cached the pipeline.
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC") pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
ds = datasets.load_dataset( ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
).sort("id")
audio = ds[40]["audio"]["array"] audio = ds[40]["audio"]["array"]
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC") pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
......
...@@ -262,9 +262,7 @@ class AudioUtilsFunctionTester(unittest.TestCase): ...@@ -262,9 +262,7 @@ class AudioUtilsFunctionTester(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset( ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"] speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
return [x["array"] for x in speech_samples] return [x["array"] for x in speech_samples]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment