Unverified Commit c38ecd2e authored by hwangjeff's avatar hwangjeff Committed by GitHub
Browse files

Rename utterance to transcript in datasets (#1841)

parent 976f56e8
......@@ -18,7 +18,7 @@ def get_mock_dataset(root_dir):
"""
mocked_data = []
sample_rate = 16000
utterance = "This is a test utterance."
transcript = "This is a test transcript."
base_dir = os.path.join(root_dir, "ARCTIC", "cmu_us_aew_arctic")
txt_dir = os.path.join(base_dir, "etc")
......@@ -44,11 +44,11 @@ def get_mock_dataset(root_dir):
sample = (
normalize_wav(data),
sample_rate,
utterance,
transcript,
utterance_id.split("_")[1],
)
mocked_data.append(sample)
txt.write(f'( {utterance_id} "{utterance}" )\n')
txt.write(f'( {utterance_id} "{transcript}" )\n')
seed += 1
return mocked_data
......@@ -66,10 +66,10 @@ class TestCMUARCTIC(TempDirMixin, TorchaudioTestCase):
def _test_cmuarctic(self, dataset):
n_ite = 0
for i, (waveform, sample_rate, utterance, utterance_id) in enumerate(dataset):
for i, (waveform, sample_rate, transcript, utterance_id) in enumerate(dataset):
expected_sample = self.samples[i]
assert sample_rate == expected_sample[1]
assert utterance == expected_sample[2]
assert transcript == expected_sample[2]
assert utterance_id == expected_sample[3]
self.assertEqual(expected_sample[0], waveform, atol=5e-5, rtol=1e-8)
n_ite += 1
......
......@@ -11,7 +11,7 @@ from torchaudio_unittest.common_utils import (
from torchaudio.datasets import librispeech
# Used to generate a unique utterance for each dummy audio file
# Used to generate a unique transcript for each dummy audio file
_NUMBERS = [
'ZERO',
'ONE',
......@@ -51,11 +51,11 @@ def get_mock_dataset(root_dir):
filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav'
path = os.path.join(chapter_path, filename)
utterance = ' '.join(
transcript = ' '.join(
[_NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]]
)
trans_content.append(
f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}'
f'{speaker_id}-{chapter_id}-{utterance_id:04d} {transcript}'
)
data = get_whitenoise(
......@@ -69,7 +69,7 @@ def get_mock_dataset(root_dir):
sample = (
normalize_wav(data),
sample_rate,
utterance,
transcript,
speaker_id,
chapter_id,
utterance_id
......@@ -104,11 +104,11 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
def _test_librispeech(self, dataset):
num_samples = 0
for i, (
data, sample_rate, utterance, speaker_id, chapter_id, utterance_id
data, sample_rate, transcript, speaker_id, chapter_id, utterance_id
) in enumerate(dataset):
self.assertEqual(data, self.samples[i][0], atol=5e-5, rtol=1e-8)
assert sample_rate == self.samples[i][1]
assert utterance == self.samples[i][2]
assert transcript == self.samples[i][2]
assert speaker_id == self.samples[i][3]
assert chapter_id == self.samples[i][4]
assert utterance_id == self.samples[i][5]
......
......@@ -11,8 +11,8 @@ from torchaudio_unittest.common_utils import (
normalize_wav,
)
# Used to generate a unique utterance for each dummy audio file
_UTTERANCE = [
# Used to generate a unique transcript for each dummy audio file
_TRANSCRIPT = [
'Please call Stella',
'Ask her to bring these things',
'with her from the store',
......@@ -59,14 +59,14 @@ def get_mock_dataset(root_dir):
save_wav(audio_file_path, data, sample_rate)
txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt')
utterance = _UTTERANCE[utterance_id - 1]
transcript = _TRANSCRIPT[utterance_id - 1]
with open(txt_file_path, 'w') as f:
f.write(utterance)
f.write(transcript)
sample = (
normalize_wav(data),
sample_rate,
utterance,
transcript,
speaker_id,
utterance_id
)
......@@ -88,10 +88,10 @@ class TestVCTK(TempDirMixin, TorchaudioTestCase):
def _test_vctk(self, dataset):
num_samples = 0
for i, (data, sample_rate, utterance, speaker_id, utterance_id) in enumerate(dataset):
for i, (data, sample_rate, transcript, speaker_id, utterance_id) in enumerate(dataset):
self.assertEqual(data, self.samples[i][0], atol=5e-5, rtol=1e-8)
assert sample_rate == self.samples[i][1]
assert utterance == self.samples[i][2]
assert transcript == self.samples[i][2]
assert speaker_id == self.samples[i][3]
assert int(utterance_id) == self.samples[i][4]
num_samples += 1
......
......@@ -58,10 +58,10 @@ def load_cmuarctic_item(line: str,
folder_audio: str,
ext_audio: str) -> Tuple[Tensor, int, str, str]:
utterance_id, utterance = line[0].strip().split(" ", 2)[1:]
utterance_id, transcript = line[0].strip().split(" ", 2)[1:]
# Remove space, double quote, and single parenthesis from utterance
utterance = utterance[1:-3]
# Remove space, double quote, and single parenthesis from transcript
transcript = transcript[1:-3]
file_audio = os.path.join(path, folder_audio, utterance_id + ext_audio)
......@@ -71,7 +71,7 @@ def load_cmuarctic_item(line: str,
return (
waveform,
sample_rate,
utterance,
transcript,
utterance_id.split("_")[1]
)
......@@ -164,7 +164,7 @@ class CMUARCTIC(Dataset):
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, utterance_id)``
tuple: ``(waveform, sample_rate, transcript, utterance_id)``
"""
line = self._walker[n]
return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
......
......@@ -49,7 +49,7 @@ def load_librispeech_item(fileid: str,
# Load text
with open(file_text) as ft:
for line in ft:
fileid_text, utterance = line.strip().split(" ", 1)
fileid_text, transcript = line.strip().split(" ", 1)
if fileid_audio == fileid_text:
break
else:
......@@ -59,7 +59,7 @@ def load_librispeech_item(fileid: str,
return (
waveform,
sample_rate,
utterance,
transcript,
int(speaker_id),
int(chapter_id),
int(utterance_id),
......@@ -133,7 +133,7 @@ class LIBRISPEECH(Dataset):
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
tuple: ``(waveform, sample_rate, transcript, speaker_id, chapter_id, utterance_id)``
"""
fileid = self._walker[n]
return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt)
......
......@@ -110,7 +110,7 @@ class VCTK_092(Dataset):
return torchaudio.load(file_path)
def _load_sample(self, speaker_id: str, utterance_id: str, mic_id: str) -> SampleType:
utterance_path = os.path.join(
transcript_path = os.path.join(
self._txt_dir, speaker_id, f"{speaker_id}_{utterance_id}.txt"
)
audio_path = os.path.join(
......@@ -120,12 +120,12 @@ class VCTK_092(Dataset):
)
# Reading text
utterance = self._load_text(utterance_path)
transcript = self._load_text(transcript_path)
# Reading FLAC
waveform, sample_rate = self._load_audio(audio_path)
return (waveform, sample_rate, utterance, speaker_id, utterance_id)
return (waveform, sample_rate, transcript, speaker_id, utterance_id)
def __getitem__(self, n: int) -> SampleType:
"""Load the n-th sample from the dataset.
......@@ -134,7 +134,7 @@ class VCTK_092(Dataset):
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
tuple: ``(waveform, sample_rate, transcript, speaker_id, utterance_id)``
"""
speaker_id, utterance_id = self._sample_ids[n]
return self._load_sample(speaker_id, utterance_id, self._mic_id)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment