Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
64956d54
Unverified
Commit
64956d54
authored
Jan 05, 2021
by
Aziz
Committed by
GitHub
Jan 05, 2021
Browse files
Refactor TEDLIUM unittest (#1135)
parent
8f02af5f
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
65 additions
and
59 deletions
+65
-59
test/torchaudio_unittest/datasets/tedlium_test.py
test/torchaudio_unittest/datasets/tedlium_test.py
+65
-59
No files found.
test/torchaudio_unittest/datasets/tedlium_test.py
View file @
64956d54
import
os
import
platform
import
unittest
from
pathlib
import
Path
from
torchaudio.datasets
import
tedlium
from
torchaudio_unittest.common_utils
import
(
TestBaseMixin
,
TempDirMixin
,
TorchaudioTestCase
,
get_whitenoise
,
save_wav
,
normalize_wav
,
)
from
torchaudio.datasets
import
tedlium
# Used to generate a unique utterance for each dummy audio file
UTTERANCES
=
[
_
UTTERANCES
=
[
"AaronHuey_2010X 1 AaronHuey_2010X 0.0 2.0 <o,f0,female> script1
\n
"
,
"AaronHuey_2010X 1 AaronHuey_2010X 2.0 4.0 <o,f0,female> script2
\n
"
,
"AaronHuey_2010X 1 AaronHuey_2010X 4.0 6.0 <o,f0,female> script3
\n
"
,
...
...
@@ -23,7 +20,7 @@ UTTERANCES = [
"AaronHuey_2010X 1 AaronHuey_2010X 8.0 10.0 <o,f0,female> script5
\n
"
,
]
PHONEME
=
[
_
PHONEME
=
[
"a AH"
,
"a(2) EY"
,
"aachen AA K AH N"
,
...
...
@@ -34,14 +31,11 @@ PHONEME = [
]
class
Tedlium
(
TempDirMixin
):
root_dir
=
None
samples
=
{}
@
classmethod
def
setUpClass
(
cls
):
cls
.
root_dir
=
cls
.
get_base_temp_dir
()
cls
.
root_dir
=
dataset_dir
=
os
.
path
.
join
(
cls
.
root_dir
,
"tedlium"
)
def
get_mock_dataset
(
dataset_dir
):
"""
dataset_dir: directory of the mocked dataset
"""
mocked_samples
=
{}
os
.
makedirs
(
dataset_dir
,
exist_ok
=
True
)
sample_rate
=
16000
# 16kHz
seed
=
0
...
...
@@ -70,16 +64,16 @@ class Tedlium(TempDirMixin):
trans_filename
=
f
"
{
release
}
.stm"
trans_path
=
os
.
path
.
join
(
os
.
path
.
join
(
release_dir
,
"stm"
),
trans_filename
)
with
open
(
trans_path
,
"w"
)
as
f
:
f
.
write
(
""
.
join
(
UTTERANCES
))
f
.
write
(
""
.
join
(
_
UTTERANCES
))
dict_filename
=
f
"
{
release
}
.dic"
dict_path
=
os
.
path
.
join
(
release_dir
,
dict_filename
)
with
open
(
dict_path
,
"w"
)
as
f
:
f
.
write
(
"
\n
"
.
join
(
PHONEME
))
f
.
write
(
"
\n
"
.
join
(
_
PHONEME
))
# Create a samples list to compare with
cls
.
samples
[
release
]
=
[]
for
utterance
in
UTTERANCES
:
mocked_
samples
[
release
]
=
[]
for
utterance
in
_
UTTERANCES
:
talk_id
,
_
,
speaker_id
,
start_time
,
end_time
,
identifier
,
transcript
=
utterance
.
split
(
" "
,
6
)
start_time
=
int
(
float
(
start_time
))
*
sample_rate
end_time
=
int
(
float
(
end_time
))
*
sample_rate
...
...
@@ -91,8 +85,20 @@ class Tedlium(TempDirMixin):
speaker_id
,
identifier
,
)
cls
.
samples
[
release
].
append
(
sample
)
mocked_
samples
[
release
].
append
(
sample
)
seed
+=
1
return
mocked_samples
class
Tedlium
(
TempDirMixin
):
root_dir
=
None
samples
=
{}
@
classmethod
def
setUpClass
(
cls
):
cls
.
root_dir
=
cls
.
get_base_temp_dir
()
cls
.
root_dir
=
dataset_dir
=
os
.
path
.
join
(
cls
.
root_dir
,
"tedlium"
)
cls
.
samples
=
get_mock_dataset
(
dataset_dir
)
def
_test_tedlium
(
self
,
dataset
,
release
):
num_samples
=
0
...
...
@@ -110,7 +116,7 @@ class Tedlium(TempDirMixin):
dataset
.
_dict_path
=
os
.
path
.
join
(
dataset
.
_path
,
f
"
{
release
}
.dic"
)
phoneme_dict
=
dataset
.
phoneme_dict
phoenemes
=
[
f
"
{
key
}
{
' '
.
join
(
value
)
}
"
for
key
,
value
in
phoneme_dict
.
items
()]
assert
phoenemes
==
PHONEME
assert
phoenemes
==
_
PHONEME
def
test_tedlium_release1_str
(
self
):
release
=
"release1"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment