Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
9bbd4600
"vscode:/vscode.git/clone" did not exist on "ef2adb04287a0eb2421536204dc4928f18072fc1"
Commit
9bbd4600
authored
Oct 08, 2021
by
hwangjeff
Committed by
hwangjeff
Oct 08, 2021
Browse files
Rename utterance to transcript in datasets (#1841)
parent
94027791
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
30 additions
and
30 deletions
+30
-30
test/torchaudio_unittest/datasets/cmuarctic_test.py
test/torchaudio_unittest/datasets/cmuarctic_test.py
+5
-5
test/torchaudio_unittest/datasets/librispeech_test.py
test/torchaudio_unittest/datasets/librispeech_test.py
+6
-6
test/torchaudio_unittest/datasets/vctk_test.py
test/torchaudio_unittest/datasets/vctk_test.py
+7
-7
torchaudio/datasets/cmuarctic.py
torchaudio/datasets/cmuarctic.py
+5
-5
torchaudio/datasets/librispeech.py
torchaudio/datasets/librispeech.py
+3
-3
torchaudio/datasets/vctk.py
torchaudio/datasets/vctk.py
+4
-4
No files found.
test/torchaudio_unittest/datasets/cmuarctic_test.py
View file @
9bbd4600
...
...
@@ -18,7 +18,7 @@ def get_mock_dataset(root_dir):
"""
mocked_data
=
[]
sample_rate
=
16000
utterance
=
"This is a test
utterance
."
transcript
=
"This is a test
transcript
."
base_dir
=
os
.
path
.
join
(
root_dir
,
"ARCTIC"
,
"cmu_us_aew_arctic"
)
txt_dir
=
os
.
path
.
join
(
base_dir
,
"etc"
)
...
...
@@ -44,11 +44,11 @@ def get_mock_dataset(root_dir):
sample
=
(
normalize_wav
(
data
),
sample_rate
,
utterance
,
transcript
,
utterance_id
.
split
(
"_"
)[
1
],
)
mocked_data
.
append
(
sample
)
txt
.
write
(
f
'(
{
utterance_id
}
"
{
utterance
}
" )
\n
'
)
txt
.
write
(
f
'(
{
utterance_id
}
"
{
transcript
}
" )
\n
'
)
seed
+=
1
return
mocked_data
...
...
@@ -66,10 +66,10 @@ class TestCMUARCTIC(TempDirMixin, TorchaudioTestCase):
def
_test_cmuarctic
(
self
,
dataset
):
n_ite
=
0
for
i
,
(
waveform
,
sample_rate
,
utterance
,
utterance_id
)
in
enumerate
(
dataset
):
for
i
,
(
waveform
,
sample_rate
,
transcript
,
utterance_id
)
in
enumerate
(
dataset
):
expected_sample
=
self
.
samples
[
i
]
assert
sample_rate
==
expected_sample
[
1
]
assert
utterance
==
expected_sample
[
2
]
assert
transcript
==
expected_sample
[
2
]
assert
utterance_id
==
expected_sample
[
3
]
self
.
assertEqual
(
expected_sample
[
0
],
waveform
,
atol
=
5e-5
,
rtol
=
1e-8
)
n_ite
+=
1
...
...
test/torchaudio_unittest/datasets/librispeech_test.py
View file @
9bbd4600
...
...
@@ -11,7 +11,7 @@ from torchaudio_unittest.common_utils import (
from
torchaudio.datasets
import
librispeech
# Used to generate a unique
utterance
for each dummy audio file
# Used to generate a unique
transcript
for each dummy audio file
_NUMBERS
=
[
'ZERO'
,
'ONE'
,
...
...
@@ -51,11 +51,11 @@ def get_mock_dataset(root_dir):
filename
=
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
.wav'
path
=
os
.
path
.
join
(
chapter_path
,
filename
)
utterance
=
' '
.
join
(
transcript
=
' '
.
join
(
[
_NUMBERS
[
x
]
for
x
in
[
speaker_id
,
chapter_id
,
utterance_id
]]
)
trans_content
.
append
(
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
{
utterance
}
'
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
{
transcript
}
'
)
data
=
get_whitenoise
(
...
...
@@ -69,7 +69,7 @@ def get_mock_dataset(root_dir):
sample
=
(
normalize_wav
(
data
),
sample_rate
,
utterance
,
transcript
,
speaker_id
,
chapter_id
,
utterance_id
...
...
@@ -104,11 +104,11 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
def
_test_librispeech
(
self
,
dataset
):
num_samples
=
0
for
i
,
(
data
,
sample_rate
,
utterance
,
speaker_id
,
chapter_id
,
utterance_id
data
,
sample_rate
,
transcript
,
speaker_id
,
chapter_id
,
utterance_id
)
in
enumerate
(
dataset
):
self
.
assertEqual
(
data
,
self
.
samples
[
i
][
0
],
atol
=
5e-5
,
rtol
=
1e-8
)
assert
sample_rate
==
self
.
samples
[
i
][
1
]
assert
utterance
==
self
.
samples
[
i
][
2
]
assert
transcript
==
self
.
samples
[
i
][
2
]
assert
speaker_id
==
self
.
samples
[
i
][
3
]
assert
chapter_id
==
self
.
samples
[
i
][
4
]
assert
utterance_id
==
self
.
samples
[
i
][
5
]
...
...
test/torchaudio_unittest/datasets/vctk_test.py
View file @
9bbd4600
...
...
@@ -11,8 +11,8 @@ from torchaudio_unittest.common_utils import (
normalize_wav
,
)
# Used to generate a unique
utterance
for each dummy audio file
_
UTTERANCE
=
[
# Used to generate a unique
transcript
for each dummy audio file
_
TRANSCRIPT
=
[
'Please call Stella'
,
'Ask her to bring these things'
,
'with her from the store'
,
...
...
@@ -59,14 +59,14 @@ def get_mock_dataset(root_dir):
save_wav
(
audio_file_path
,
data
,
sample_rate
)
txt_file_path
=
os
.
path
.
join
(
file_dir
,
filename
[:
-
5
]
+
'.txt'
)
utterance
=
_UTTERANCE
[
utterance_id
-
1
]
transcript
=
_TRANSCRIPT
[
utterance_id
-
1
]
with
open
(
txt_file_path
,
'w'
)
as
f
:
f
.
write
(
utterance
)
f
.
write
(
transcript
)
sample
=
(
normalize_wav
(
data
),
sample_rate
,
utterance
,
transcript
,
speaker_id
,
utterance_id
)
...
...
@@ -88,10 +88,10 @@ class TestVCTK(TempDirMixin, TorchaudioTestCase):
def
_test_vctk
(
self
,
dataset
):
num_samples
=
0
for
i
,
(
data
,
sample_rate
,
utterance
,
speaker_id
,
utterance_id
)
in
enumerate
(
dataset
):
for
i
,
(
data
,
sample_rate
,
transcript
,
speaker_id
,
utterance_id
)
in
enumerate
(
dataset
):
self
.
assertEqual
(
data
,
self
.
samples
[
i
][
0
],
atol
=
5e-5
,
rtol
=
1e-8
)
assert
sample_rate
==
self
.
samples
[
i
][
1
]
assert
utterance
==
self
.
samples
[
i
][
2
]
assert
transcript
==
self
.
samples
[
i
][
2
]
assert
speaker_id
==
self
.
samples
[
i
][
3
]
assert
int
(
utterance_id
)
==
self
.
samples
[
i
][
4
]
num_samples
+=
1
...
...
torchaudio/datasets/cmuarctic.py
View file @
9bbd4600
...
...
@@ -58,10 +58,10 @@ def load_cmuarctic_item(line: str,
folder_audio
:
str
,
ext_audio
:
str
)
->
Tuple
[
Tensor
,
int
,
str
,
str
]:
utterance_id
,
utterance
=
line
[
0
].
strip
().
split
(
" "
,
2
)[
1
:]
utterance_id
,
transcript
=
line
[
0
].
strip
().
split
(
" "
,
2
)[
1
:]
# Remove space, double quote, and single parenthesis from
utterance
utterance
=
utterance
[
1
:
-
3
]
# Remove space, double quote, and single parenthesis from
transcript
transcript
=
transcript
[
1
:
-
3
]
file_audio
=
os
.
path
.
join
(
path
,
folder_audio
,
utterance_id
+
ext_audio
)
...
...
@@ -71,7 +71,7 @@ def load_cmuarctic_item(line: str,
return
(
waveform
,
sample_rate
,
utterance
,
transcript
,
utterance_id
.
split
(
"_"
)[
1
]
)
...
...
@@ -164,7 +164,7 @@ class CMUARCTIC(Dataset):
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate,
utterance
, utterance_id)``
tuple: ``(waveform, sample_rate,
transcript
, utterance_id)``
"""
line
=
self
.
_walker
[
n
]
return
load_cmuarctic_item
(
line
,
self
.
_path
,
self
.
_folder_audio
,
self
.
_ext_audio
)
...
...
torchaudio/datasets/librispeech.py
View file @
9bbd4600
...
...
@@ -49,7 +49,7 @@ def load_librispeech_item(fileid: str,
# Load text
with
open
(
file_text
)
as
ft
:
for
line
in
ft
:
fileid_text
,
utterance
=
line
.
strip
().
split
(
" "
,
1
)
fileid_text
,
transcript
=
line
.
strip
().
split
(
" "
,
1
)
if
fileid_audio
==
fileid_text
:
break
else
:
...
...
@@ -59,7 +59,7 @@ def load_librispeech_item(fileid: str,
return
(
waveform
,
sample_rate
,
utterance
,
transcript
,
int
(
speaker_id
),
int
(
chapter_id
),
int
(
utterance_id
),
...
...
@@ -133,7 +133,7 @@ class LIBRISPEECH(Dataset):
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate,
utterance
, speaker_id, chapter_id, utterance_id)``
tuple: ``(waveform, sample_rate,
transcript
, speaker_id, chapter_id, utterance_id)``
"""
fileid
=
self
.
_walker
[
n
]
return
load_librispeech_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
,
self
.
_ext_txt
)
...
...
torchaudio/datasets/vctk.py
View file @
9bbd4600
...
...
@@ -241,7 +241,7 @@ class VCTK_092(Dataset):
return
torchaudio
.
load
(
file_path
)
def
_load_sample
(
self
,
speaker_id
:
str
,
utterance_id
:
str
,
mic_id
:
str
)
->
SampleType
:
utterance
_path
=
os
.
path
.
join
(
transcript
_path
=
os
.
path
.
join
(
self
.
_txt_dir
,
speaker_id
,
f
"
{
speaker_id
}
_
{
utterance_id
}
.txt"
)
audio_path
=
os
.
path
.
join
(
...
...
@@ -251,12 +251,12 @@ class VCTK_092(Dataset):
)
# Reading text
utterance
=
self
.
_load_text
(
utterance
_path
)
transcript
=
self
.
_load_text
(
transcript
_path
)
# Reading FLAC
waveform
,
sample_rate
=
self
.
_load_audio
(
audio_path
)
return
(
waveform
,
sample_rate
,
utterance
,
speaker_id
,
utterance_id
)
return
(
waveform
,
sample_rate
,
transcript
,
speaker_id
,
utterance_id
)
def
__getitem__
(
self
,
n
:
int
)
->
SampleType
:
"""Load the n-th sample from the dataset.
...
...
@@ -265,7 +265,7 @@ class VCTK_092(Dataset):
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate,
utterance
, speaker_id, utterance_id)``
tuple: ``(waveform, sample_rate,
transcript
, speaker_id, utterance_id)``
"""
speaker_id
,
utterance_id
=
self
.
_sample_ids
[
n
]
return
self
.
_load_sample
(
speaker_id
,
utterance_id
,
self
.
_mic_id
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment