Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
1def3fa9
Unverified
Commit
1def3fa9
authored
Jul 23, 2020
by
moto
Committed by
GitHub
Jul 23, 2020
Browse files
Make walk_files traverse in alphabetical and breadth-first order (#814)
parent
68f6a6a0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
62 additions
and
8 deletions
+62
-8
test/datasets/libritts_test.py
test/datasets/libritts_test.py
+4
-4
test/datasets/utils_test.py
test/datasets/utils_test.py
+47
-0
test/datasets/yesno_test.py
test/datasets/yesno_test.py
+4
-3
torchaudio/datasets/utils.py
torchaudio/datasets/utils.py
+7
-1
No files found.
test/datasets/libritts_test.py
View file @
1def3fa9
...
@@ -49,16 +49,14 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
...
@@ -49,16 +49,14 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
def
test_libritts
(
self
):
def
test_libritts
(
self
):
dataset
=
LIBRITTS
(
self
.
root_dir
)
dataset
=
LIBRITTS
(
self
.
root_dir
)
samples
=
list
(
dataset
)
n_ites
=
0
samples
.
sort
(
key
=
lambda
s
:
s
[
4
])
for
i
,
(
waveform
,
for
i
,
(
waveform
,
sample_rate
,
sample_rate
,
original_text
,
original_text
,
normalized_text
,
normalized_text
,
speaker_id
,
speaker_id
,
chapter_id
,
chapter_id
,
utterance_id
)
in
enumerate
(
samples
):
utterance_id
)
in
enumerate
(
dataset
):
expected_ids
=
self
.
utterance_ids
[
i
]
expected_ids
=
self
.
utterance_ids
[
i
]
expected_data
=
self
.
data
[
i
]
expected_data
=
self
.
data
[
i
]
...
@@ -69,3 +67,5 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
...
@@ -69,3 +67,5 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
assert
original_text
==
self
.
original_text
assert
original_text
==
self
.
original_text
assert
normalized_text
==
self
.
normalized_text
assert
normalized_text
==
self
.
normalized_text
assert
utterance_id
==
f
'
{
"_"
.
join
(
str
(
u
)
for
u
in
expected_ids
[
-
4
:])
}
'
assert
utterance_id
==
f
'
{
"_"
.
join
(
str
(
u
)
for
u
in
expected_ids
[
-
4
:])
}
'
n_ites
+=
1
assert
n_ites
==
len
(
self
.
utterance_ids
)
test/datasets/utils_test.py
0 → 100644
View file @
1def3fa9
import
os
from
pathlib
import
Path
from
torchaudio.datasets
import
utils
as
dataset_utils
from
..common_utils
import
(
TempDirMixin
,
TorchaudioTestCase
,
)
class
TestWalkFiles
(
TempDirMixin
,
TorchaudioTestCase
):
root
=
None
expected
=
None
def
_add_file
(
self
,
*
parts
):
path
=
self
.
get_temp_path
(
*
parts
)
self
.
expected
.
append
(
path
)
Path
(
path
).
touch
()
def
setUp
(
self
):
self
.
root
=
self
.
get_temp_path
()
self
.
expected
=
[]
# level 1
for
filename
in
[
'a.txt'
,
'b.txt'
,
'c.txt'
]:
self
.
_add_file
(
filename
)
# level 2
for
dir1
in
[
'd1'
,
'd2'
,
'd3'
]:
for
filename
in
[
'd.txt'
,
'e.txt'
,
'f.txt'
]:
self
.
_add_file
(
dir1
,
filename
)
# level 3
for
dir2
in
[
'd1'
,
'd2'
,
'd3'
]:
for
filename
in
[
'g.txt'
,
'h.txt'
,
'i.txt'
]:
self
.
_add_file
(
dir1
,
dir2
,
filename
)
print
(
'
\n
'
.
join
(
self
.
expected
))
def
test_walk_files
(
self
):
"""walk_files should traverse files in alphabetical order"""
n_ites
=
0
for
i
,
path
in
enumerate
(
dataset_utils
.
walk_files
(
self
.
root
,
'.txt'
,
prefix
=
True
)):
found
=
os
.
path
.
join
(
self
.
root
,
path
)
assert
found
==
self
.
expected
[
i
]
n_ites
+=
1
assert
n_ites
==
len
(
self
.
expected
)
test/datasets/yesno_test.py
View file @
1def3fa9
...
@@ -38,11 +38,12 @@ class TestYesNo(TempDirMixin, TorchaudioTestCase):
...
@@ -38,11 +38,12 @@ class TestYesNo(TempDirMixin, TorchaudioTestCase):
def
test_yesno
(
self
):
def
test_yesno
(
self
):
dataset
=
yesno
.
YESNO
(
self
.
root_dir
)
dataset
=
yesno
.
YESNO
(
self
.
root_dir
)
samples
=
list
(
dataset
)
n_ite
=
0
samples
.
sort
(
key
=
lambda
s
:
s
[
2
])
for
i
,
(
waveform
,
sample_rate
,
label
)
in
enumerate
(
dataset
):
for
i
,
(
waveform
,
sample_rate
,
label
)
in
enumerate
(
samples
):
expected_label
=
self
.
labels
[
i
]
expected_label
=
self
.
labels
[
i
]
expected_data
=
self
.
data
[
i
]
expected_data
=
self
.
data
[
i
]
self
.
assertEqual
(
expected_data
,
waveform
,
atol
=
5e-5
,
rtol
=
1e-8
)
self
.
assertEqual
(
expected_data
,
waveform
,
atol
=
5e-5
,
rtol
=
1e-8
)
assert
sample_rate
==
8000
assert
sample_rate
==
8000
assert
label
==
expected_label
assert
label
==
expected_label
n_ite
+=
1
assert
n_ite
==
len
(
self
.
data
)
torchaudio/datasets/utils.py
View file @
1def3fa9
...
@@ -264,7 +264,13 @@ def walk_files(root: str,
...
@@ -264,7 +264,13 @@ def walk_files(root: str,
root
=
os
.
path
.
expanduser
(
root
)
root
=
os
.
path
.
expanduser
(
root
)
for
dirpath
,
_
,
files
in
os
.
walk
(
root
):
for
dirpath
,
dirs
,
files
in
os
.
walk
(
root
):
dirs
.
sort
()
# `dirs` is the list used in os.walk function and by sorting it in-place here, we change the
# behavior of os.walk to traverse sub directory alphabetically
# see also
# https://stackoverflow.com/questions/6670029/can-i-force-python3s-os-walk-to-visit-directories-in-alphabetical-order-how#comment71993866_6670926
files
.
sort
()
for
f
in
files
:
for
f
in
files
:
if
f
.
endswith
(
suffix
):
if
f
.
endswith
(
suffix
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment