Unverified Commit e3d1d746 authored by moto's avatar moto Committed by GitHub
Browse files

Update docstrings/documentations of all the datasets (#931)

parent 963224f5
...@@ -29,82 +29,85 @@ CMUARCTIC ...@@ -29,82 +29,85 @@ CMUARCTIC
~~~~~~~~~ ~~~~~~~~~
.. autoclass:: CMUARCTIC .. autoclass:: CMUARCTIC
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
COMMONVOICE COMMONVOICE
~~~~~~~~~~~ ~~~~~~~~~~~
.. autoclass:: COMMONVOICE .. autoclass:: COMMONVOICE
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
GTZAN GTZAN
~~~~~ ~~~~~
.. autoclass:: GTZAN .. autoclass:: GTZAN
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
LIBRISPEECH LIBRISPEECH
~~~~~~~~~~~ ~~~~~~~~~~~
.. autoclass:: LIBRISPEECH .. autoclass:: LIBRISPEECH
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
LIBRITTS LIBRITTS
~~~~~~~~ ~~~~~~~~
.. autoclass:: LIBRITTS .. autoclass:: LIBRITTS
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
LJSPEECH LJSPEECH
~~~~~~~~ ~~~~~~~~
.. autoclass:: LJSPEECH .. autoclass:: LJSPEECH
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
SPEECHCOMMANDS SPEECHCOMMANDS
~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~
.. autoclass:: SPEECHCOMMANDS .. autoclass:: SPEECHCOMMANDS
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
TEDLIUM TEDLIUM
~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~
.. autoclass:: TEDLIUM .. autoclass:: TEDLIUM
:members: __getitem__ :members:
:special-members: get_phoneme_dict :special-members: __getitem__
VCTK VCTK
~~~~ ~~~~
.. autoclass:: VCTK .. autoclass:: VCTK
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
VCTK_092 VCTK_092
~~~~~~~~ ~~~~~~~~
.. autoclass:: VCTK_092 .. autoclass:: VCTK_092
:members:
:special-members: __getitem__
YESNO YESNO
~~~~~ ~~~~~
.. autoclass:: YESNO .. autoclass:: YESNO
:members: __getitem__ :members:
:special-members: :special-members: __getitem__
...@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str, ...@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,
class CMUARCTIC(Dataset): class CMUARCTIC(Dataset):
""" """Create a Dataset for CMU_ARCTIC.
Create a Dataset for CMU_arctic. Each item is a tuple of the form:
waveform, sample_rate, utterance, utterance_id Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional):
The URL to download the dataset from or the type of the dataset to dowload.
(default: ``"aew"``)
Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"ARCTIC"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
""" """
_file_text = "txt.done.data" _file_text = "txt.done.data"
...@@ -143,6 +154,14 @@ class CMUARCTIC(Dataset): ...@@ -143,6 +154,14 @@ class CMUARCTIC(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, utterance_id)``
"""
line = self._walker[n] line = self._walker[n]
return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio) return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
......
...@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str], ...@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],
class COMMONVOICE(Dataset): class COMMONVOICE(Dataset):
""" """Create a Dataset for CommonVoice.
Create a Dataset for CommonVoice. Each item is a tuple of the form:
(waveform, sample_rate, dictionary) Args:
where dictionary is a dictionary built from the tsv file with the following keys: root (str): Path to the directory where the dataset is found or downloaded.
client_id, path, sentence, up_votes, down_votes, age, gender, accent. tsv (str, optional): The name of the tsv file used to construct the metadata.
(default: ``"train.tsv"``)
url (str, optional): The URL to download the dataset from, or the language of
the dataset to download. (default: ``"english"``).
Allowed language values are ``"tatar"``, ``"english"``, ``"german"``,
``"french"``, ``"welsh"``, ``"breton"``, ``"chuvash"``, ``"turkish"``, ``"kyrgyz"``,
``"irish"``, ``"kabyle"``, ``"catalan"``, ``"taiwanese"``, ``"slovenian"``,
``"italian"``, ``"dutch"``, ``"hakha chin"``, ``"esperanto"``, ``"estonian"``,
``"persian"``, ``"portuguese"``, ``"basque"``, ``"spanish"``, ``"chinese"``,
``"mongolian"``, ``"sakha"``, ``"dhivehi"``, ``"kinyarwanda"``, ``"swedish"``,
``"russian"``, ``"indonesian"``, ``"arabic"``, ``"tamil"``, ``"interlingua"``,
``"latvian"``, ``"japanese"``, ``"votic"``, ``"abkhaz"``, ``"cantonese"`` and
``"romansh sursilvan"``.
folder_in_archive (str, optional): The top-level directory of the dataset.
version (str): Version string. (default: ``"cv-corpus-4-2019-12-10"``)
For the other allowed values, Please checkout https://commonvoice.mozilla.org/en/datasets.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
""" """
_ext_txt = ".txt" _ext_txt = ".txt"
...@@ -192,6 +209,16 @@ class COMMONVOICE(Dataset): ...@@ -192,6 +209,16 @@ class COMMONVOICE(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]: def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, dictionary)``, where dictionary is built
from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
"""
line = self._walker[n] line = self._walker[n]
return load_commonvoice_item(line, self._header, self._path, self._folder_audio) return load_commonvoice_item(line, self._header, self._path, self._folder_audio)
......
import os import os
import warnings import warnings
from typing import Any, Tuple from typing import Any, Tuple, Optional
import torchaudio import torchaudio
from torch import Tensor from torch import Tensor
...@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str ...@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str
class GTZAN(Dataset): class GTZAN(Dataset):
""" """Create a Dataset for GTZAN.
Create a Dataset for GTZAN. Each item is a tuple of the form:
waveform, sample_rate, label. Note:
Please see http://marsyas.info/downloads/datasets.html if you are planning to use
this dataset to publish results.
Please see http://marsyas.info/downloads/datasets.html Args:
if you are planning to use this dataset to publish results. root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
folder_in_archive (str, optional): The top-level directory of the dataset.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
subset (str, optional): Which subset of the dataset to use.
One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
If ``None``, the entire dataset is used. (default: ``None``).
""" """
_ext_audio = ".wav" _ext_audio = ".wav"
...@@ -1014,7 +1024,7 @@ class GTZAN(Dataset): ...@@ -1014,7 +1024,7 @@ class GTZAN(Dataset):
url: str = URL, url: str = URL,
folder_in_archive: str = FOLDER_IN_ARCHIVE, folder_in_archive: str = FOLDER_IN_ARCHIVE,
download: bool = False, download: bool = False,
subset: Any = None, subset: Optional[str] = None,
) -> None: ) -> None:
# super(GTZAN, self).__init__() # super(GTZAN, self).__init__()
...@@ -1082,6 +1092,14 @@ class GTZAN(Dataset): ...@@ -1082,6 +1092,14 @@ class GTZAN(Dataset):
self._walker = filtered_test self._walker = filtered_test
def __getitem__(self, n: int) -> Tuple[Tensor, int, str]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label)``
"""
fileid = self._walker[n] fileid = self._walker[n]
item = load_gtzan_item(fileid, self._path, self._ext_audio) item = load_gtzan_item(fileid, self._path, self._ext_audio)
waveform, sample_rate, label = item waveform, sample_rate, label = item
......
...@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str, ...@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,
class LIBRISPEECH(Dataset): class LIBRISPEECH(Dataset):
""" """Create a Dataset for LibriSpeech.
Create a Dataset for LibriSpeech. Each item is a tuple of the form:
waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriSpeech"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
""" """
_ext_txt = ".trans.txt" _ext_txt = ".trans.txt"
...@@ -117,6 +127,14 @@ class LIBRISPEECH(Dataset): ...@@ -117,6 +127,14 @@ class LIBRISPEECH(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
"""
fileid = self._walker[n] fileid = self._walker[n]
return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt) return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt)
......
...@@ -65,9 +65,19 @@ def load_libritts_item( ...@@ -65,9 +65,19 @@ def load_libritts_item(
class LIBRITTS(Dataset): class LIBRITTS(Dataset):
""" """Create a Dataset for LibriTTS.
Create a Dataset for LibriTTS. Each item is a tuple of the form:
waveform, sample_rate, original_text, normalized_text, speaker_id, chapter_id, utterance_id Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriTTS"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
""" """
_ext_original_txt = ".original.txt" _ext_original_txt = ".original.txt"
...@@ -118,6 +128,15 @@ class LIBRITTS(Dataset): ...@@ -118,6 +128,15 @@ class LIBRITTS(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, original_text, normalized_text, speaker_id,
chapter_id, utterance_id)``
"""
fileid = self._walker[n] fileid = self._walker[n]
return load_libritts_item( return load_libritts_item(
fileid, fileid,
......
...@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens ...@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens
class LJSPEECH(Dataset): class LJSPEECH(Dataset):
""" """Create a Dataset for LJSpeech-1.1.
Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
waveform, sample_rate, transcript, normalized_transcript Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"wavs"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
""" """
_ext_audio = ".wav" _ext_audio = ".wav"
...@@ -68,6 +75,14 @@ class LJSPEECH(Dataset): ...@@ -68,6 +75,14 @@ class LJSPEECH(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, transcript, normalized_transcript)``
"""
line = self._walker[n] line = self._walker[n]
return load_ljspeech_item(line, self._path, self._ext_audio) return load_ljspeech_item(line, self._path, self._ext_audio)
......
...@@ -36,9 +36,18 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str ...@@ -36,9 +36,18 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str
class SPEECHCOMMANDS(Dataset): class SPEECHCOMMANDS(Dataset):
""" """Create a Dataset for Speech Commands.
Create a Dataset for Speech Commands. Each item is a tuple of the form:
waveform, sample_rate, label, speaker_id, utterance_number Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"speech_commands_v0.01"`` and ``"speech_commands_v0.02"``
(default: ``"speech_commands_v0.02"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"SpeechCommands"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
""" """
def __init__(self, def __init__(self,
...@@ -75,6 +84,14 @@ class SPEECHCOMMANDS(Dataset): ...@@ -75,6 +84,14 @@ class SPEECHCOMMANDS(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label, speaker_id, utterance_number)``
"""
fileid = self._walker[n] fileid = self._walker[n]
return load_speechcommands_item(fileid, self._path) return load_speechcommands_item(fileid, self._path)
......
...@@ -43,44 +43,21 @@ _RELEASE_CONFIGS = { ...@@ -43,44 +43,21 @@ _RELEASE_CONFIGS = {
class TEDLIUM(Dataset): class TEDLIUM(Dataset):
""" """
Create a Dataset for Tedlium. It supports releases 1,2 and 3, each item is a list containings: Create a Dataset for Tedlium. It supports releases 1,2 and 3.
[waveform, sample_rate, transcript, talk_id, speaker_id, identifier].
Constructor arguments:
Args: Args:
root (str): Path containing dataset or target path where its downloaded if needed root (str): Path to the directory where the dataset is found or downloaded.
release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE. release (str, optional): Release version.
subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None Allowed values are ``"release1"``, ``"release2"`` or ``"release3"``.
download (bool, optional): Download dataset in case is not founded in root path. Defaults to False. (default: ``"release1"``).
audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph". subset (str, optional): The subset of dataset to use. Valid options are ``"train"``, ``"dev"``,
and ``"test"`` for releases 1&2, ``None`` for release3. Defaults to ``"train"`` or ``None``.
Special functions: download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
_load_tedlium_item: Loads a TEDLIUM dataset sample given a file name and corresponding sentence name
_load_audio: Default load function used in TEDLIUM dataset, you can overwrite this function to customize
functionality and load individual sentences from a full ted audio talk file
get_phoneme_dict: Returns the phoneme dictionary of a TEDLIUM release
""" """
def __init__( def __init__(
self, root: str, release: str = "release1", subset: str = None, download: bool = False, audio_ext=".sph" self, root: str, release: str = "release1", subset: str = None, download: bool = False, audio_ext=".sph"
) -> None: ) -> None:
"""Constructor for TEDLIUM dataset.
Args:
root (str): Path containing dataset or target path where its downloaded if needed
release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
Raises:
RuntimeError: If release identifier does not match any supported release,
"""
self._ext_audio = audio_ext self._ext_audio = audio_ext
if release in _RELEASE_CONFIGS.keys(): if release in _RELEASE_CONFIGS.keys():
folder_in_archive = _RELEASE_CONFIGS[release]["folder_in_archive"] folder_in_archive = _RELEASE_CONFIGS[release]["folder_in_archive"]
...@@ -140,7 +117,7 @@ class TEDLIUM(Dataset): ...@@ -140,7 +117,7 @@ class TEDLIUM(Dataset):
path (str): Dataset root path path (str): Dataset root path
Returns: Returns:
Tedlium_item: A namedTuple containing [waveform, sample_rate, transcript, talk_id, speaker_id, identifier] tuple: ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
""" """
transcript_path = os.path.join(path, "stm", fileid) transcript_path = os.path.join(path, "stm", fileid)
with open(transcript_path + ".stm") as f: with open(transcript_path + ".stm") as f:
...@@ -171,14 +148,13 @@ class TEDLIUM(Dataset): ...@@ -171,14 +148,13 @@ class TEDLIUM(Dataset):
return torchaudio.load(path)[:, start_time:end_time] return torchaudio.load(path)[:, start_time:end_time]
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
"""TEDLIUM dataset custom function overwritting default loadbehaviour """Load the n-th sample from the dataset.
Loads a TEDLIUM sample given a index N.
Args: Args:
n (int): Index of sample to be loaded n (int): The index of the sample to be loaded
Returns: Returns:
Tedlium_item: A namedTuple containing [waveform, sample_rate, transcript, talk_id, speaker_id, identifier] tuple: ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
""" """
fileid, line = self._filelist[n] fileid, line = self._filelist[n]
return self._load_tedlium_item(fileid, line, self._path) return self._load_tedlium_item(fileid, line, self._path)
...@@ -193,10 +169,8 @@ class TEDLIUM(Dataset): ...@@ -193,10 +169,8 @@ class TEDLIUM(Dataset):
@property @property
def phoneme_dict(self): def phoneme_dict(self):
"""Returns the phoneme dictionary of a TEDLIUM release. """dict[str, tuple[str]]: Phonemes. Mapping from word to tuple of phonemes.
Note that some words have empty phonemes.
Returns:
dictionary: Phoneme dictionary for the current tedlium release
""" """
# Read phoneme dictionary # Read phoneme dictionary
if not self._phoneme_dict: if not self._phoneme_dict:
......
...@@ -54,12 +54,25 @@ def load_vctk_item(fileid: str, ...@@ -54,12 +54,25 @@ def load_vctk_item(fileid: str,
class VCTK(Dataset): class VCTK(Dataset):
""" """Create a Dataset for VCTK.
Create a Dataset for VCTK. Each item is a tuple of the form:
(waveform, sample_rate, utterance, speaker_id, utterance_id) Note:
* **This dataset is no longer publicly available.** Please use :py:class:`VCTK_092`
* Directory ``p315`` is ignored because there is no corresponding text files.
For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443
Folder `p315` will be ignored due to the non-existent corresponding text files. Args:
For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443 root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): Not used as the dataset is no longer publicly available.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"VCTK-Corpus"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
Giving ``download=True`` will result in error as the dataset is no longer
publicly available.
downsample (bool, optional): Not used.
transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
""" """
_folder_txt = "txt" _folder_txt = "txt"
...@@ -118,6 +131,14 @@ class VCTK(Dataset): ...@@ -118,6 +131,14 @@ class VCTK(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]: def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
"""
fileid = self._walker[n] fileid = self._walker[n]
item = load_vctk_item( item = load_vctk_item(
fileid, fileid,
...@@ -145,14 +166,13 @@ class VCTK(Dataset): ...@@ -145,14 +166,13 @@ class VCTK(Dataset):
class VCTK_092(Dataset): class VCTK_092(Dataset):
"""Create VCTK 0.92 Dataset """Create VCTK 0.92 Dataset
An item is a ``namedtuple`` of (``waveform``, ``sample_rate``, ``utterance``,
``speaker_id``, ``utterance_id``)
Args: Args:
root (str): Root directory where the dataset's top level directory is found. root (str): Root directory where the dataset's top level directory is found.
mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"`` mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
download (bool, optional): Download the dataset if not found in the given directory. download (bool, optional):
url (str, optional): URL from which the dataset is downloaded. Whether to download the dataset if it is not found at root path. (default: ``False``).
url (str, optional): The URL to download the dataset from.
(default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format. audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.
Note: Note:
...@@ -252,6 +272,14 @@ class VCTK_092(Dataset): ...@@ -252,6 +272,14 @@ class VCTK_092(Dataset):
return Sample(waveform, sample_rate, utterance, speaker_id, utterance_id) return Sample(waveform, sample_rate, utterance, speaker_id, utterance_id)
def __getitem__(self, n: int) -> Sample: def __getitem__(self, n: int) -> Sample:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
"""
speaker_id, utterance_id = self._sample_ids[n] speaker_id, utterance_id = self._sample_ids[n]
return self._load_sample(speaker_id, utterance_id, self._mic_id) return self._load_sample(speaker_id, utterance_id, self._mic_id)
......
...@@ -31,9 +31,18 @@ def load_yesno_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, int ...@@ -31,9 +31,18 @@ def load_yesno_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, int
class YESNO(Dataset): class YESNO(Dataset):
""" """Create a Dataset for YesNo.
Create a Dataset for YesNo. Each item is a tuple of the form:
(waveform, sample_rate, labels) Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://www.openslr.org/resources/1/waves_yesno.tar.gz"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"waves_yesno"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
""" """
_ext_audio = ".wav" _ext_audio = ".wav"
...@@ -78,6 +87,14 @@ class YESNO(Dataset): ...@@ -78,6 +87,14 @@ class YESNO(Dataset):
self._walker = list(walker) self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, labels)``
"""
fileid = self._walker[n] fileid = self._walker[n]
item = load_yesno_item(fileid, self._path, self._ext_audio) item = load_yesno_item(fileid, self._path, self._ext_audio)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment