Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
cf114276
Unverified
Commit
cf114276
authored
Dec 30, 2020
by
Aziz
Committed by
GitHub
Dec 30, 2020
Browse files
Refactor librispeech unittest (#1140)
parent
5bf6b146
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
64 additions
and
55 deletions
+64
-55
test/torchaudio_unittest/datasets/librispeech_test.py
test/torchaudio_unittest/datasets/librispeech_test.py
+64
-55
No files found.
test/torchaudio_unittest/datasets/librispeech_test.py
View file @
cf114276
import
os
from
pathlib
import
Path
from
torchaudio.datasets
import
librispeech
from
torchaudio_unittest.common_utils
import
(
TempDirMixin
,
TorchaudioTestCase
,
...
...
@@ -11,8 +9,10 @@ from torchaudio_unittest.common_utils import (
normalize_wav
,
)
from
torchaudio.datasets
import
librispeech
# Used to generate a unique utterance for each dummy audio file
NUMBERS
=
[
_
NUMBERS
=
[
'ZERO'
,
'ONE'
,
'TWO'
,
...
...
@@ -26,6 +26,65 @@ NUMBERS = [
]
def
get_mock_dataset
(
root_dir
):
"""
root_dir: directory to the mocked dataset
"""
mocked_data
=
[]
dataset_dir
=
os
.
path
.
join
(
root_dir
,
librispeech
.
FOLDER_IN_ARCHIVE
,
librispeech
.
URL
)
os
.
makedirs
(
dataset_dir
,
exist_ok
=
True
)
sample_rate
=
16000
# 16kHz
seed
=
0
for
speaker_id
in
range
(
5
):
speaker_path
=
os
.
path
.
join
(
dataset_dir
,
str
(
speaker_id
))
os
.
makedirs
(
speaker_path
,
exist_ok
=
True
)
for
chapter_id
in
range
(
3
):
chapter_path
=
os
.
path
.
join
(
speaker_path
,
str
(
chapter_id
))
os
.
makedirs
(
chapter_path
,
exist_ok
=
True
)
trans_content
=
[]
for
utterance_id
in
range
(
10
):
filename
=
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
.wav'
path
=
os
.
path
.
join
(
chapter_path
,
filename
)
utterance
=
' '
.
join
(
[
_NUMBERS
[
x
]
for
x
in
[
speaker_id
,
chapter_id
,
utterance_id
]]
)
trans_content
.
append
(
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
{
utterance
}
'
)
data
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.01
,
n_channels
=
1
,
dtype
=
'float32'
,
seed
=
seed
)
save_wav
(
path
,
data
,
sample_rate
)
sample
=
(
normalize_wav
(
data
),
sample_rate
,
utterance
,
speaker_id
,
chapter_id
,
utterance_id
)
mocked_data
.
append
(
sample
)
seed
+=
1
trans_filename
=
f
'
{
speaker_id
}
-
{
chapter_id
}
.trans.txt'
trans_path
=
os
.
path
.
join
(
chapter_path
,
trans_filename
)
with
open
(
trans_path
,
'w'
)
as
f
:
f
.
write
(
'
\n
'
.
join
(
trans_content
))
return
mocked_data
class
TestLibriSpeech
(
TempDirMixin
,
TorchaudioTestCase
):
backend
=
'default'
...
...
@@ -35,57 +94,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
root_dir
=
cls
.
get_base_temp_dir
()
dataset_dir
=
os
.
path
.
join
(
cls
.
root_dir
,
librispeech
.
FOLDER_IN_ARCHIVE
,
librispeech
.
URL
)
os
.
makedirs
(
dataset_dir
,
exist_ok
=
True
)
sample_rate
=
16000
# 16kHz
seed
=
0
for
speaker_id
in
range
(
5
):
speaker_path
=
os
.
path
.
join
(
dataset_dir
,
str
(
speaker_id
))
os
.
makedirs
(
speaker_path
,
exist_ok
=
True
)
for
chapter_id
in
range
(
3
):
chapter_path
=
os
.
path
.
join
(
speaker_path
,
str
(
chapter_id
))
os
.
makedirs
(
chapter_path
,
exist_ok
=
True
)
trans_content
=
[]
for
utterance_id
in
range
(
10
):
filename
=
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
.wav'
path
=
os
.
path
.
join
(
chapter_path
,
filename
)
utterance
=
' '
.
join
(
[
NUMBERS
[
x
]
for
x
in
[
speaker_id
,
chapter_id
,
utterance_id
]]
)
trans_content
.
append
(
f
'
{
speaker_id
}
-
{
chapter_id
}
-
{
utterance_id
:
04
d
}
{
utterance
}
'
)
data
=
get_whitenoise
(
sample_rate
=
sample_rate
,
duration
=
0.01
,
n_channels
=
1
,
dtype
=
'float32'
,
seed
=
seed
)
save_wav
(
path
,
data
,
sample_rate
)
sample
=
(
normalize_wav
(
data
),
sample_rate
,
utterance
,
speaker_id
,
chapter_id
,
utterance_id
)
cls
.
samples
.
append
(
sample
)
seed
+=
1
trans_filename
=
f
'
{
speaker_id
}
-
{
chapter_id
}
.trans.txt'
trans_path
=
os
.
path
.
join
(
chapter_path
,
trans_filename
)
with
open
(
trans_path
,
'w'
)
as
f
:
f
.
write
(
'
\n
'
.
join
(
trans_content
))
cls
.
samples
=
get_mock_dataset
(
cls
.
root_dir
)
@
classmethod
def
tearDownClass
(
cls
):
...
...
@@ -95,7 +104,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
def
_test_librispeech
(
self
,
dataset
):
num_samples
=
0
for
i
,
(
data
,
sample_rate
,
utterance
,
speaker_id
,
chapter_id
,
utterance_id
data
,
sample_rate
,
utterance
,
speaker_id
,
chapter_id
,
utterance_id
)
in
enumerate
(
dataset
):
self
.
assertEqual
(
data
,
self
.
samples
[
i
][
0
],
atol
=
5e-5
,
rtol
=
1e-8
)
assert
sample_rate
==
self
.
samples
[
i
][
1
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment