librispeech_test.py 3.35 KB
Newer Older
1
import os
2
from pathlib import Path
3

4
from torchaudio.datasets import librispeech
5
from torchaudio_unittest.common_utils import (
6
7
    get_whitenoise,
    normalize_wav,
8
9
10
    save_wav,
    TempDirMixin,
    TorchaudioTestCase,
11
12
)

13
# Used to generate a unique transcript for each dummy audio file
14
_NUMBERS = ["ZERO", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE"]
15
16


Aziz's avatar
Aziz committed
17
18
19
20
21
def get_mock_dataset(root_dir):
    """
    root_dir: directory to the mocked dataset
    """
    mocked_data = []
22
    dataset_dir = os.path.join(root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL)
Aziz's avatar
Aziz committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
    os.makedirs(dataset_dir, exist_ok=True)
    sample_rate = 16000  # 16kHz
    seed = 0

    for speaker_id in range(5):
        speaker_path = os.path.join(dataset_dir, str(speaker_id))
        os.makedirs(speaker_path, exist_ok=True)

        for chapter_id in range(3):
            chapter_path = os.path.join(speaker_path, str(chapter_id))
            os.makedirs(chapter_path, exist_ok=True)
            trans_content = []

            for utterance_id in range(10):
37
                filename = f"{speaker_id}-{chapter_id}-{utterance_id:04d}.wav"
Aziz's avatar
Aziz committed
38
39
                path = os.path.join(chapter_path, filename)

40
41
42
43
                transcript = " ".join([_NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]])
                trans_content.append(f"{speaker_id}-{chapter_id}-{utterance_id:04d} {transcript}")

                data = get_whitenoise(sample_rate=sample_rate, duration=0.01, n_channels=1, dtype="float32", seed=seed)
Aziz's avatar
Aziz committed
44
                save_wav(path, data, sample_rate)
45
                sample = (normalize_wav(data), sample_rate, transcript, speaker_id, chapter_id, utterance_id)
Aziz's avatar
Aziz committed
46
47
48
49
                mocked_data.append(sample)

                seed += 1

50
            trans_filename = f"{speaker_id}-{chapter_id}.trans.txt"
Aziz's avatar
Aziz committed
51
            trans_path = os.path.join(chapter_path, trans_filename)
52
53
            with open(trans_path, "w") as f:
                f.write("\n".join(trans_content))
Aziz's avatar
Aziz committed
54
55
56
    return mocked_data


57
class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
58
    backend = "default"
59
60
61
62
63
64
65

    root_dir = None
    samples = []

    @classmethod
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
Aziz's avatar
Aziz committed
66
        cls.samples = get_mock_dataset(cls.root_dir)
67
68
69
70

    @classmethod
    def tearDownClass(cls):
        # In case of test failure
71
        librispeech.LIBRISPEECH._ext_audio = ".flac"
72

73
    def _test_librispeech(self, dataset):
74
        num_samples = 0
75
        for i, (data, sample_rate, transcript, speaker_id, chapter_id, utterance_id) in enumerate(dataset):
76
77
            self.assertEqual(data, self.samples[i][0], atol=5e-5, rtol=1e-8)
            assert sample_rate == self.samples[i][1]
78
            assert transcript == self.samples[i][2]
79
80
81
82
83
84
            assert speaker_id == self.samples[i][3]
            assert chapter_id == self.samples[i][4]
            assert utterance_id == self.samples[i][5]
            num_samples += 1

        assert num_samples == len(self.samples)
85
        librispeech.LIBRISPEECH._ext_audio = ".flac"
86
87

    def test_librispeech_str(self):
88
        librispeech.LIBRISPEECH._ext_audio = ".wav"
89
90
91
92
        dataset = librispeech.LIBRISPEECH(self.root_dir)
        self._test_librispeech(dataset)

    def test_librispeech_path(self):
93
        librispeech.LIBRISPEECH._ext_audio = ".wav"
94
95
        dataset = librispeech.LIBRISPEECH(Path(self.root_dir))
        self._test_librispeech(dataset)