Unverified Commit cf114276 authored by Aziz's avatar Aziz Committed by GitHub
Browse files

Refactor librispeech unittest (#1140)

parent 5bf6b146
import os import os
from pathlib import Path from pathlib import Path
from torchaudio.datasets import librispeech
from torchaudio_unittest.common_utils import ( from torchaudio_unittest.common_utils import (
TempDirMixin, TempDirMixin,
TorchaudioTestCase, TorchaudioTestCase,
...@@ -11,8 +9,10 @@ from torchaudio_unittest.common_utils import ( ...@@ -11,8 +9,10 @@ from torchaudio_unittest.common_utils import (
normalize_wav, normalize_wav,
) )
from torchaudio.datasets import librispeech
# Used to generate a unique utterance for each dummy audio file # Used to generate a unique utterance for each dummy audio file
NUMBERS = [ _NUMBERS = [
'ZERO', 'ZERO',
'ONE', 'ONE',
'TWO', 'TWO',
...@@ -26,17 +26,13 @@ NUMBERS = [ ...@@ -26,17 +26,13 @@ NUMBERS = [
] ]
class TestLibriSpeech(TempDirMixin, TorchaudioTestCase): def get_mock_dataset(root_dir):
backend = 'default' """
root_dir: directory to the mocked dataset
root_dir = None """
samples = [] mocked_data = []
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
dataset_dir = os.path.join( dataset_dir = os.path.join(
cls.root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL
) )
os.makedirs(dataset_dir, exist_ok=True) os.makedirs(dataset_dir, exist_ok=True)
sample_rate = 16000 # 16kHz sample_rate = 16000 # 16kHz
...@@ -56,7 +52,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase): ...@@ -56,7 +52,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
path = os.path.join(chapter_path, filename) path = os.path.join(chapter_path, filename)
utterance = ' '.join( utterance = ' '.join(
[NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]] [_NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]]
) )
trans_content.append( trans_content.append(
f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}' f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}'
...@@ -78,7 +74,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase): ...@@ -78,7 +74,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
chapter_id, chapter_id,
utterance_id utterance_id
) )
cls.samples.append(sample) mocked_data.append(sample)
seed += 1 seed += 1
...@@ -86,6 +82,19 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase): ...@@ -86,6 +82,19 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
trans_path = os.path.join(chapter_path, trans_filename) trans_path = os.path.join(chapter_path, trans_filename)
with open(trans_path, 'w') as f: with open(trans_path, 'w') as f:
f.write('\n'.join(trans_content)) f.write('\n'.join(trans_content))
return mocked_data
class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
backend = 'default'
root_dir = None
samples = []
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
cls.samples = get_mock_dataset(cls.root_dir)
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment