Refactor vctk unittest (#1134)

70fd2f3d · Aziz · GitHub · 93c3025f · 70fd2f3d
Unverified Commit 70fd2f3d authored Dec 30, 2020 by Aziz Committed by GitHub Dec 30, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 50 additions and 42 deletions

test/torchaudio_unittest/datasets/vctk_test.py test/torchaudio_unittest/datasets/vctk_test.py +50 -42

No files found.
--- a/test/torchaudio_unittest/datasets/vctk_test.py
+++ b/test/torchaudio_unittest/datasets/vctk_test.py
@@ -12,7 +12,7 @@ from torchaudio_unittest.common_utils import (
 )

 # Used to generate a unique utterance for each dummy audio file
-UTTERANCE = [
+_UTTERANCE = [
    'Please call Stella',
    'Ask her to bring these things',
    'with her from the store',
@@ -27,6 +27,54 @@ UTTERANCE = [
 ]


+def get_mock_dataset(root_dir):
+    """
+    root_dir: root directory of the mocked data
+    """
+    mocked_samples = []
+    dataset_dir = os.path.join(root_dir, 'VCTK-Corpus-0.92')
+    os.makedirs(dataset_dir, exist_ok=True)
+    sample_rate = 48000
+    seed = 0
+
+    for speaker in range(225, 230):
+        speaker_id = 'p' + str(speaker)
+        audio_dir = os.path.join(dataset_dir, 'wav48_silence_trimmed', speaker_id)
+        os.makedirs(audio_dir, exist_ok=True)
+
+        file_dir = os.path.join(dataset_dir, 'txt', speaker_id)
+        os.makedirs(file_dir, exist_ok=True)
+
+        for utterance_id in range(1, 11):
+            filename = f'{speaker_id}_{utterance_id:03d}_mic2'
+            audio_file_path = os.path.join(audio_dir, filename + '.wav')
+
+            data = get_whitenoise(
+                sample_rate=sample_rate,
+                duration=0.01,
+                n_channels=1,
+                dtype='float32',
+                seed=seed
+            )
+            save_wav(audio_file_path, data, sample_rate)
+
+            txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt')
+            utterance = _UTTERANCE[utterance_id - 1]
+            with open(txt_file_path, 'w') as f:
+                f.write(utterance)
+
+            sample = (
+                normalize_wav(data),
+                sample_rate,
+                utterance,
+                speaker_id,
+                utterance_id
+            )
+            mocked_samples.append(sample)
+            seed += 1
+    return mocked_samples
+
+
 class TestVCTK(TempDirMixin, TorchaudioTestCase):
    backend = 'default'

@@ -36,47 +84,7 @@ class TestVCTK(TempDirMixin, TorchaudioTestCase):
    @classmethod
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
-        dataset_dir = os.path.join(cls.root_dir, 'VCTK-Corpus-0.92')
-        os.makedirs(dataset_dir, exist_ok=True)
-        sample_rate = 48000
-        seed = 0
-
-        for speaker in range(225, 230):
-            speaker_id = 'p' + str(speaker)
-            audio_dir = os.path.join(dataset_dir, 'wav48_silence_trimmed', speaker_id)
-            os.makedirs(audio_dir, exist_ok=True)
-
-            file_dir = os.path.join(dataset_dir, 'txt', speaker_id)
-            os.makedirs(file_dir, exist_ok=True)
-
-            for utterance_id in range(1, 11):
-                filename = f'{speaker_id}_{utterance_id:03d}_mic2'
-                audio_file_path = os.path.join(audio_dir, filename + '.wav')
-
-                data = get_whitenoise(
-                    sample_rate=sample_rate,
-                    duration=0.01,
-                    n_channels=1,
-                    dtype='float32',
-                    seed=seed
-                )
-                save_wav(audio_file_path, data, sample_rate)
-
-                txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt')
-                utterance = UTTERANCE[utterance_id - 1]
-                with open(txt_file_path, 'w') as f:
-                    f.write(utterance)
-
-                sample = (
-                    normalize_wav(data),
-                    sample_rate,
-                    utterance,
-                    speaker_id,
-                    utterance_id
-                )
-                cls.samples.append(sample)
-
-                seed += 1
+        cls.samples = get_mock_dataset(cls.root_dir)

    def _test_vctk(self, dataset):
        num_samples = 0