Unverified Commit 1def3fa9 authored by moto's avatar moto Committed by GitHub
Browse files

Make walk_files traverse in alphabetical and breadth-first order (#814)

parent 68f6a6a0
...@@ -49,16 +49,14 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase): ...@@ -49,16 +49,14 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
def test_libritts(self): def test_libritts(self):
dataset = LIBRITTS(self.root_dir) dataset = LIBRITTS(self.root_dir)
samples = list(dataset) n_ites = 0
samples.sort(key=lambda s: s[4])
for i, (waveform, for i, (waveform,
sample_rate, sample_rate,
original_text, original_text,
normalized_text, normalized_text,
speaker_id, speaker_id,
chapter_id, chapter_id,
utterance_id) in enumerate(samples): utterance_id) in enumerate(dataset):
expected_ids = self.utterance_ids[i] expected_ids = self.utterance_ids[i]
expected_data = self.data[i] expected_data = self.data[i]
...@@ -69,3 +67,5 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase): ...@@ -69,3 +67,5 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
assert original_text == self.original_text assert original_text == self.original_text
assert normalized_text == self.normalized_text assert normalized_text == self.normalized_text
assert utterance_id == f'{"_".join(str(u) for u in expected_ids[-4:])}' assert utterance_id == f'{"_".join(str(u) for u in expected_ids[-4:])}'
n_ites += 1
assert n_ites == len(self.utterance_ids)
import os
from pathlib import Path
from torchaudio.datasets import utils as dataset_utils
from ..common_utils import (
TempDirMixin,
TorchaudioTestCase,
)
class TestWalkFiles(TempDirMixin, TorchaudioTestCase):
root = None
expected = None
def _add_file(self, *parts):
path = self.get_temp_path(*parts)
self.expected.append(path)
Path(path).touch()
def setUp(self):
self.root = self.get_temp_path()
self.expected = []
# level 1
for filename in ['a.txt', 'b.txt', 'c.txt']:
self._add_file(filename)
# level 2
for dir1 in ['d1', 'd2', 'd3']:
for filename in ['d.txt', 'e.txt', 'f.txt']:
self._add_file(dir1, filename)
# level 3
for dir2 in ['d1', 'd2', 'd3']:
for filename in ['g.txt', 'h.txt', 'i.txt']:
self._add_file(dir1, dir2, filename)
print('\n'.join(self.expected))
def test_walk_files(self):
"""walk_files should traverse files in alphabetical order"""
n_ites = 0
for i, path in enumerate(dataset_utils.walk_files(self.root, '.txt', prefix=True)):
found = os.path.join(self.root, path)
assert found == self.expected[i]
n_ites += 1
assert n_ites == len(self.expected)
...@@ -38,11 +38,12 @@ class TestYesNo(TempDirMixin, TorchaudioTestCase): ...@@ -38,11 +38,12 @@ class TestYesNo(TempDirMixin, TorchaudioTestCase):
def test_yesno(self): def test_yesno(self):
dataset = yesno.YESNO(self.root_dir) dataset = yesno.YESNO(self.root_dir)
samples = list(dataset) n_ite = 0
samples.sort(key=lambda s: s[2]) for i, (waveform, sample_rate, label) in enumerate(dataset):
for i, (waveform, sample_rate, label) in enumerate(samples):
expected_label = self.labels[i] expected_label = self.labels[i]
expected_data = self.data[i] expected_data = self.data[i]
self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8)
assert sample_rate == 8000 assert sample_rate == 8000
assert label == expected_label assert label == expected_label
n_ite += 1
assert n_ite == len(self.data)
...@@ -264,7 +264,13 @@ def walk_files(root: str, ...@@ -264,7 +264,13 @@ def walk_files(root: str,
root = os.path.expanduser(root) root = os.path.expanduser(root)
for dirpath, _, files in os.walk(root): for dirpath, dirs, files in os.walk(root):
dirs.sort()
# `dirs` is the list used in os.walk function and by sorting it in-place here, we change the
# behavior of os.walk to traverse sub directory alphabetically
# see also
# https://stackoverflow.com/questions/6670029/can-i-force-python3s-os-walk-to-visit-directories-in-alphabetical-order-how#comment71993866_6670926
files.sort()
for f in files: for f in files:
if f.endswith(suffix): if f.endswith(suffix):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment