"tests/rag/test_retrieval_rag.py" did not exist on "f4e04cd2c671312c8f750f55cf9f51753718f0df"
Unverified Commit ca57b450 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Unispeech] Fix slow tests (#15818)

* remove soundfile old way of loading audio

* Adapt slow test
parent 35ecf99c
...@@ -538,21 +538,13 @@ class UniSpeechRobustModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -538,21 +538,13 @@ class UniSpeechRobustModelTest(ModelTesterMixin, unittest.TestCase):
@slow @slow
class UniSpeechModelIntegrationTest(unittest.TestCase): class UniSpeechModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
import soundfile as sf ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
)[:num_samples]["audio"]
ids = [f"1272-141231-000{i}" for i in range(num_samples)] return [x["array"] for x in speech_samples]
# map files to raw
def map_to_array(batch):
speech, _ = sf.read(batch["file"])
batch["speech"] = speech
return batch
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
return ds["speech"][:num_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
......
...@@ -800,21 +800,13 @@ class UniSpeechSatRobustModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -800,21 +800,13 @@ class UniSpeechSatRobustModelTest(ModelTesterMixin, unittest.TestCase):
@slow @slow
class UniSpeechSatModelIntegrationTest(unittest.TestCase): class UniSpeechSatModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
import soundfile as sf ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
)[:num_samples]["audio"]
ids = [f"1272-141231-000{i}" for i in range(num_samples)] return [x["array"] for x in speech_samples]
# map files to raw
def map_to_array(batch):
speech, _ = sf.read(batch["file"])
batch["speech"] = speech
return batch
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
return ds["speech"][:num_samples]
def _load_superb(self, task, num_samples): def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test") ds = load_dataset("anton-l/superb_dummy", task, split="test")
...@@ -865,10 +857,10 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase): ...@@ -865,10 +857,10 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
# fmt: off # fmt: off
expected_hidden_states_slice = torch.tensor( expected_hidden_states_slice = torch.tensor(
[[[-0.1172, -0.0797], [[[-0.1192, -0.0825],
[-0.0012, 0.0213]], [-0.0012, 0.0235]],
[[-0.1225, -0.1277], [[-0.1240, -0.1332],
[-0.0668, -0.0585]]], [-0.0658, -0.0565]]],
device=torch_device, device=torch_device,
) )
# fmt: on # fmt: on
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment