Unverified Commit 6edb3355 authored by Krishna Kalyan's avatar Krishna Kalyan Committed by GitHub
Browse files

Refactor CMUARCTIC unittest (#1147)


Co-authored-by: default avatarkrishnakalyan3 <skalyan@cloudera.com>
parent 02e4f6d2
......@@ -12,6 +12,47 @@ from torchaudio_unittest.common_utils import (
)
def get_mock_dataset(root_dir):
"""
root_dir: directory to the mocked dataset
"""
mocked_data = []
sample_rate = 16000
utterance = "This is a test utterance."
base_dir = os.path.join(root_dir, "ARCTIC", "cmu_us_aew_arctic")
txt_dir = os.path.join(base_dir, "etc")
os.makedirs(txt_dir, exist_ok=True)
txt_file = os.path.join(txt_dir, "txt.done.data")
audio_dir = os.path.join(base_dir, "wav")
os.makedirs(audio_dir, exist_ok=True)
seed = 42
with open(txt_file, "w") as txt:
for c in ["a", "b"]:
for i in range(5):
utterance_id = f"arctic_{c}{i:04d}"
path = os.path.join(audio_dir, f"{utterance_id}.wav")
data = get_whitenoise(
sample_rate=sample_rate,
duration=3,
n_channels=1,
dtype="int16",
seed=seed,
)
save_wav(path, data, sample_rate)
sample = (
normalize_wav(data),
sample_rate,
utterance,
utterance_id.split("_")[1],
)
mocked_data.append(sample)
txt.write(f'( {utterance_id} "{utterance}" )\n')
seed += 1
return mocked_data
class TestCMUARCTIC(TempDirMixin, TorchaudioTestCase):
backend = "default"
......@@ -21,39 +62,7 @@ class TestCMUARCTIC(TempDirMixin, TorchaudioTestCase):
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
sample_rate = 16000
utterance = "This is a test utterance."
base_dir = os.path.join(cls.root_dir, "ARCTIC", "cmu_us_aew_arctic")
txt_dir = os.path.join(base_dir, "etc")
os.makedirs(txt_dir, exist_ok=True)
txt_file = os.path.join(txt_dir, "txt.done.data")
audio_dir = os.path.join(base_dir, "wav")
os.makedirs(audio_dir, exist_ok=True)
seed = 42
with open(txt_file, "w") as txt:
for c in ["a", "b"]:
for i in range(5):
utterance_id = f"arctic_{c}{i:04d}"
path = os.path.join(audio_dir, f"{utterance_id}.wav")
data = get_whitenoise(
sample_rate=sample_rate,
duration=3,
n_channels=1,
dtype="int16",
seed=seed,
)
save_wav(path, data, sample_rate)
sample = (
normalize_wav(data),
sample_rate,
utterance,
utterance_id.split("_")[1],
)
cls.samples.append(sample)
txt.write(f'( {utterance_id} "{utterance}" )\n')
seed += 1
cls.samples = get_mock_dataset(cls.root_dir)
def _test_cmuarctic(self, dataset):
n_ite = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment