Unverified Commit e3d1d746 authored by moto's avatar moto Committed by GitHub
Browse files

Update docstrings/documentations of all the datasets (#931)

parent 963224f5
......@@ -29,82 +29,85 @@ CMUARCTIC
~~~~~~~~~
.. autoclass:: CMUARCTIC
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
COMMONVOICE
~~~~~~~~~~~
.. autoclass:: COMMONVOICE
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
GTZAN
~~~~~
.. autoclass:: GTZAN
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
LIBRISPEECH
~~~~~~~~~~~
.. autoclass:: LIBRISPEECH
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
LIBRITTS
~~~~~~~~
.. autoclass:: LIBRITTS
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
LJSPEECH
~~~~~~~~
.. autoclass:: LJSPEECH
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
SPEECHCOMMANDS
~~~~~~~~~~~~~~
.. autoclass:: SPEECHCOMMANDS
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
TEDLIUM
~~~~~~~~~~~~~~
.. autoclass:: TEDLIUM
:members: __getitem__
:special-members: get_phoneme_dict
:members:
:special-members: __getitem__
VCTK
~~~~
.. autoclass:: VCTK
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
VCTK_092
~~~~~~~~
.. autoclass:: VCTK_092
:members:
:special-members: __getitem__
YESNO
~~~~~
.. autoclass:: YESNO
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
......@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,
class CMUARCTIC(Dataset):
"""
Create a Dataset for CMU_arctic. Each item is a tuple of the form:
waveform, sample_rate, utterance, utterance_id
"""Create a Dataset for CMU_ARCTIC.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional):
The URL to download the dataset from or the type of the dataset to dowload.
(default: ``"aew"``)
Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"ARCTIC"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
_file_text = "txt.done.data"
......@@ -143,6 +154,14 @@ class CMUARCTIC(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, utterance_id)``
"""
line = self._walker[n]
return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
......
......@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],
class COMMONVOICE(Dataset):
"""
Create a Dataset for CommonVoice. Each item is a tuple of the form:
(waveform, sample_rate, dictionary)
where dictionary is a dictionary built from the tsv file with the following keys:
client_id, path, sentence, up_votes, down_votes, age, gender, accent.
"""Create a Dataset for CommonVoice.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
tsv (str, optional): The name of the tsv file used to construct the metadata.
(default: ``"train.tsv"``)
url (str, optional): The URL to download the dataset from, or the language of
the dataset to download. (default: ``"english"``).
Allowed language values are ``"tatar"``, ``"english"``, ``"german"``,
``"french"``, ``"welsh"``, ``"breton"``, ``"chuvash"``, ``"turkish"``, ``"kyrgyz"``,
``"irish"``, ``"kabyle"``, ``"catalan"``, ``"taiwanese"``, ``"slovenian"``,
``"italian"``, ``"dutch"``, ``"hakha chin"``, ``"esperanto"``, ``"estonian"``,
``"persian"``, ``"portuguese"``, ``"basque"``, ``"spanish"``, ``"chinese"``,
``"mongolian"``, ``"sakha"``, ``"dhivehi"``, ``"kinyarwanda"``, ``"swedish"``,
``"russian"``, ``"indonesian"``, ``"arabic"``, ``"tamil"``, ``"interlingua"``,
``"latvian"``, ``"japanese"``, ``"votic"``, ``"abkhaz"``, ``"cantonese"`` and
``"romansh sursilvan"``.
folder_in_archive (str, optional): The top-level directory of the dataset.
version (str): Version string. (default: ``"cv-corpus-4-2019-12-10"``)
For the other allowed values, Please checkout https://commonvoice.mozilla.org/en/datasets.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
_ext_txt = ".txt"
......@@ -192,6 +209,16 @@ class COMMONVOICE(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, dictionary)``, where dictionary is built
from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
"""
line = self._walker[n]
return load_commonvoice_item(line, self._header, self._path, self._folder_audio)
......
import os
import warnings
from typing import Any, Tuple
from typing import Any, Tuple, Optional
import torchaudio
from torch import Tensor
......@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str
class GTZAN(Dataset):
"""
Create a Dataset for GTZAN. Each item is a tuple of the form:
waveform, sample_rate, label.
"""Create a Dataset for GTZAN.
Note:
Please see http://marsyas.info/downloads/datasets.html if you are planning to use
this dataset to publish results.
Please see http://marsyas.info/downloads/datasets.html
if you are planning to use this dataset to publish results.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
folder_in_archive (str, optional): The top-level directory of the dataset.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
subset (str, optional): Which subset of the dataset to use.
One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
If ``None``, the entire dataset is used. (default: ``None``).
"""
_ext_audio = ".wav"
......@@ -1014,7 +1024,7 @@ class GTZAN(Dataset):
url: str = URL,
folder_in_archive: str = FOLDER_IN_ARCHIVE,
download: bool = False,
subset: Any = None,
subset: Optional[str] = None,
) -> None:
# super(GTZAN, self).__init__()
......@@ -1082,6 +1092,14 @@ class GTZAN(Dataset):
self._walker = filtered_test
def __getitem__(self, n: int) -> Tuple[Tensor, int, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label)``
"""
fileid = self._walker[n]
item = load_gtzan_item(fileid, self._path, self._ext_audio)
waveform, sample_rate, label = item
......
......@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,
class LIBRISPEECH(Dataset):
"""
Create a Dataset for LibriSpeech. Each item is a tuple of the form:
waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id
"""Create a Dataset for LibriSpeech.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriSpeech"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
_ext_txt = ".trans.txt"
......@@ -117,6 +127,14 @@ class LIBRISPEECH(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
"""
fileid = self._walker[n]
return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt)
......
......@@ -65,9 +65,19 @@ def load_libritts_item(
class LIBRITTS(Dataset):
"""
Create a Dataset for LibriTTS. Each item is a tuple of the form:
waveform, sample_rate, original_text, normalized_text, speaker_id, chapter_id, utterance_id
"""Create a Dataset for LibriTTS.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriTTS"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
_ext_original_txt = ".original.txt"
......@@ -118,6 +128,15 @@ class LIBRITTS(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, original_text, normalized_text, speaker_id,
chapter_id, utterance_id)``
"""
fileid = self._walker[n]
return load_libritts_item(
fileid,
......
......@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens
class LJSPEECH(Dataset):
"""
Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
waveform, sample_rate, transcript, normalized_transcript
"""Create a Dataset for LJSpeech-1.1.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"wavs"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
_ext_audio = ".wav"
......@@ -68,6 +75,14 @@ class LJSPEECH(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, transcript, normalized_transcript)``
"""
line = self._walker[n]
return load_ljspeech_item(line, self._path, self._ext_audio)
......
......@@ -36,9 +36,18 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str
class SPEECHCOMMANDS(Dataset):
"""
Create a Dataset for Speech Commands. Each item is a tuple of the form:
waveform, sample_rate, label, speaker_id, utterance_number
"""Create a Dataset for Speech Commands.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"speech_commands_v0.01"`` and ``"speech_commands_v0.02"``
(default: ``"speech_commands_v0.02"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"SpeechCommands"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
def __init__(self,
......@@ -75,6 +84,14 @@ class SPEECHCOMMANDS(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label, speaker_id, utterance_number)``
"""
fileid = self._walker[n]
return load_speechcommands_item(fileid, self._path)
......
......@@ -43,44 +43,21 @@ _RELEASE_CONFIGS = {
class TEDLIUM(Dataset):
"""
Create a Dataset for Tedlium. It supports releases 1,2 and 3, each item is a list containings:
[waveform, sample_rate, transcript, talk_id, speaker_id, identifier].
Constructor arguments:
Create a Dataset for Tedlium. It supports releases 1,2 and 3.
Args:
root (str): Path containing dataset or target path where its downloaded if needed
release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
Special functions:
_load_tedlium_item: Loads a TEDLIUM dataset sample given a file name and corresponding sentence name
_load_audio: Default load function used in TEDLIUM dataset, you can overwrite this function to customize
functionality and load individual sentences from a full ted audio talk file
get_phoneme_dict: Returns the phoneme dictionary of a TEDLIUM release
root (str): Path to the directory where the dataset is found or downloaded.
release (str, optional): Release version.
Allowed values are ``"release1"``, ``"release2"`` or ``"release3"``.
(default: ``"release1"``).
subset (str, optional): The subset of dataset to use. Valid options are ``"train"``, ``"dev"``,
and ``"test"`` for releases 1&2, ``None`` for release3. Defaults to ``"train"`` or ``None``.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
def __init__(
self, root: str, release: str = "release1", subset: str = None, download: bool = False, audio_ext=".sph"
) -> None:
"""Constructor for TEDLIUM dataset.
Args:
root (str): Path containing dataset or target path where its downloaded if needed
release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
Raises:
RuntimeError: If release identifier does not match any supported release,
"""
self._ext_audio = audio_ext
if release in _RELEASE_CONFIGS.keys():
folder_in_archive = _RELEASE_CONFIGS[release]["folder_in_archive"]
......@@ -140,7 +117,7 @@ class TEDLIUM(Dataset):
path (str): Dataset root path
Returns:
Tedlium_item: A namedTuple containing [waveform, sample_rate, transcript, talk_id, speaker_id, identifier]
tuple: ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
"""
transcript_path = os.path.join(path, "stm", fileid)
with open(transcript_path + ".stm") as f:
......@@ -171,14 +148,13 @@ class TEDLIUM(Dataset):
return torchaudio.load(path)[:, start_time:end_time]
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
"""TEDLIUM dataset custom function overwritting default loadbehaviour
Loads a TEDLIUM sample given a index N.
"""Load the n-th sample from the dataset.
Args:
n (int): Index of sample to be loaded
n (int): The index of the sample to be loaded
Returns:
Tedlium_item: A namedTuple containing [waveform, sample_rate, transcript, talk_id, speaker_id, identifier]
tuple: ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
"""
fileid, line = self._filelist[n]
return self._load_tedlium_item(fileid, line, self._path)
......@@ -193,10 +169,8 @@ class TEDLIUM(Dataset):
@property
def phoneme_dict(self):
"""Returns the phoneme dictionary of a TEDLIUM release.
Returns:
dictionary: Phoneme dictionary for the current tedlium release
"""dict[str, tuple[str]]: Phonemes. Mapping from word to tuple of phonemes.
Note that some words have empty phonemes.
"""
# Read phoneme dictionary
if not self._phoneme_dict:
......
......@@ -54,12 +54,25 @@ def load_vctk_item(fileid: str,
class VCTK(Dataset):
"""
Create a Dataset for VCTK. Each item is a tuple of the form:
(waveform, sample_rate, utterance, speaker_id, utterance_id)
"""Create a Dataset for VCTK.
Note:
* **This dataset is no longer publicly available.** Please use :py:class:`VCTK_092`
* Directory ``p315`` is ignored because there is no corresponding text files.
For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443
Folder `p315` will be ignored due to the non-existent corresponding text files.
For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): Not used as the dataset is no longer publicly available.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"VCTK-Corpus"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
Giving ``download=True`` will result in error as the dataset is no longer
publicly available.
downsample (bool, optional): Not used.
transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
"""
_folder_txt = "txt"
......@@ -118,6 +131,14 @@ class VCTK(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
"""
fileid = self._walker[n]
item = load_vctk_item(
fileid,
......@@ -145,14 +166,13 @@ class VCTK(Dataset):
class VCTK_092(Dataset):
"""Create VCTK 0.92 Dataset
An item is a ``namedtuple`` of (``waveform``, ``sample_rate``, ``utterance``,
``speaker_id``, ``utterance_id``)
Args:
root (str): Root directory where the dataset's top level directory is found.
mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``
download (bool, optional): Download the dataset if not found in the given directory.
url (str, optional): URL from which the dataset is downloaded.
mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
url (str, optional): The URL to download the dataset from.
(default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.
Note:
......@@ -252,6 +272,14 @@ class VCTK_092(Dataset):
return Sample(waveform, sample_rate, utterance, speaker_id, utterance_id)
def __getitem__(self, n: int) -> Sample:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
"""
speaker_id, utterance_id = self._sample_ids[n]
return self._load_sample(speaker_id, utterance_id, self._mic_id)
......
......@@ -31,9 +31,18 @@ def load_yesno_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, int
class YESNO(Dataset):
"""
Create a Dataset for YesNo. Each item is a tuple of the form:
(waveform, sample_rate, labels)
"""Create a Dataset for YesNo.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://www.openslr.org/resources/1/waves_yesno.tar.gz"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"waves_yesno"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
"""
_ext_audio = ".wav"
......@@ -78,6 +87,14 @@ class YESNO(Dataset):
self._walker = list(walker)
def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, labels)``
"""
fileid = self._walker[n]
item = load_yesno_item(fileid, self._path, self._ext_audio)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment