Add pathlib support for LIBRITTS and LIBRISPEECH (#1046)

b5c16d33 · Bhargav Kathivarapu · GitHub · 37b4e136 · b5c16d33 · b5c16d33
Unverified Commit b5c16d33 authored Nov 19, 2020 by Bhargav Kathivarapu Committed by GitHub Nov 18, 2020
4 changed files
--- a/test/torchaudio_unittest/datasets/librispeech_test.py
+++ b/test/torchaudio_unittest/datasets/librispeech_test.py
 import os
+from pathlib import Path

 from torchaudio.datasets import librispeech

@@ -91,11 +92,7 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):
        # In case of test failure
        librispeech.LIBRISPEECH._ext_audio = '.flac'

-    def test_librispeech(self):
-        librispeech.LIBRISPEECH._ext_audio = '.wav'
-        dataset = librispeech.LIBRISPEECH(self.root_dir)
-        print(dataset._path)
-
+    def _test_librispeech(self, dataset):
        num_samples = 0
        for i, (
            data, sample_rate, utterance, speaker_id, chapter_id, utterance_id
@@ -110,3 +107,13 @@ class TestLibriSpeech(TempDirMixin, TorchaudioTestCase):

        assert num_samples == len(self.samples)
        librispeech.LIBRISPEECH._ext_audio = '.flac'
+
+    def test_librispeech_str(self):
+        librispeech.LIBRISPEECH._ext_audio = '.wav'
+        dataset = librispeech.LIBRISPEECH(self.root_dir)
+        self._test_librispeech(dataset)
+
+    def test_librispeech_path(self):
+        librispeech.LIBRISPEECH._ext_audio = '.wav'
+        dataset = librispeech.LIBRISPEECH(Path(self.root_dir))
+        self._test_librispeech(dataset)
--- a/test/torchaudio_unittest/datasets/libritts_test.py
+++ b/test/torchaudio_unittest/datasets/libritts_test.py
 import os
+from pathlib import Path

 from torchaudio.datasets.libritts import LIBRITTS

@@ -47,8 +48,7 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
            with open(path_normalized, 'w') as file_:
                file_.write(cls.normalized_text)

-    def test_libritts(self):
-        dataset = LIBRITTS(self.root_dir)
+    def _test_libritts(self, dataset):
        n_ites = 0
        for i, (waveform,
                sample_rate,
@@ -69,3 +69,11 @@ class TestLibriTTS(TempDirMixin, TorchaudioTestCase):
            assert utterance_id == f'{"_".join(str(u) for u in expected_ids[-4:])}'
            n_ites += 1
        assert n_ites == len(self.utterance_ids)
+
+    def test_libritts_str(self):
+        dataset = LIBRITTS(self.root_dir)
+        self._test_libritts(dataset)
+
+    def test_libritts_path(self):
+        dataset = LIBRITTS(Path(self.root_dir))
+        self._test_libritts(dataset)
--- a/torchaudio/datasets/librispeech.py
+++ b/torchaudio/datasets/librispeech.py
 import os
-from typing import Tuple
+from typing import Tuple, Union
+from pathlib import Path

 import torchaudio
 from torch import Tensor
@@ -70,7 +71,7 @@ class LIBRISPEECH(Dataset):
    """Create a Dataset for LibriSpeech.

    Args:
-        root (str): Path to the directory where the dataset is found or downloaded.
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
        url (str, optional): The URL to download the dataset from,
            or the type of the dataset to dowload.
            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
@@ -86,7 +87,7 @@ class LIBRISPEECH(Dataset):
    _ext_audio = ".flac"

    def __init__(self,
-                 root: str,
+                 root: Union[str, Path],
                 url: str = URL,
                 folder_in_archive: str = FOLDER_IN_ARCHIVE,
                 download: bool = False) -> None:
@@ -106,6 +107,9 @@ class LIBRISPEECH(Dataset):

            url = os.path.join(base_url, url + ext_archive)

+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+
        basename = os.path.basename(url)
        archive = os.path.join(root, basename)


--- a/torchaudio/datasets/libritts.py
+++ b/torchaudio/datasets/libritts.py
 import os
-from typing import Tuple
+from typing import Tuple, Union
+from pathlib import Path

 import torchaudio
 from torch import Tensor
@@ -68,7 +69,7 @@ class LIBRITTS(Dataset):
    """Create a Dataset for LibriTTS.

    Args:
-        root (str): Path to the directory where the dataset is found or downloaded.
+        root (str or Path): Path to the directory where the dataset is found or downloaded.
        url (str, optional): The URL to download the dataset from,
            or the type of the dataset to dowload.
            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
@@ -86,7 +87,7 @@ class LIBRITTS(Dataset):

    def __init__(
        self,
-        root: str,
+        root: Union[str, Path],
        url: str = URL,
        folder_in_archive: str = FOLDER_IN_ARCHIVE,
        download: bool = False,
@@ -107,6 +108,9 @@ class LIBRITTS(Dataset):

            url = os.path.join(base_url, url + ext_archive)

+        # Get string representation of 'root' in case Path object is passed
+        root = os.fspath(root)
+
        basename = os.path.basename(url)
        archive = os.path.join(root, basename)