Update docstrings/documentations of all the datasets (#931)

e3d1d746 · moto · GitHub · 963224f5 · e3d1d746 · e3d1d746
Unverified Commit e3d1d746 authored Oct 02, 2020 by moto Committed by GitHub Oct 02, 2020
11 changed files
--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
@@ -29,82 +29,85 @@ CMUARCTIC
 ~~~~~~~~~

 .. autoclass:: CMUARCTIC
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 COMMONVOICE
 ~~~~~~~~~~~

 .. autoclass:: COMMONVOICE
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 GTZAN
 ~~~~~

 .. autoclass:: GTZAN
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 LIBRISPEECH
 ~~~~~~~~~~~

 .. autoclass:: LIBRISPEECH
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 LIBRITTS
 ~~~~~~~~

 .. autoclass:: LIBRITTS
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 LJSPEECH
 ~~~~~~~~

 .. autoclass:: LJSPEECH
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 SPEECHCOMMANDS
 ~~~~~~~~~~~~~~

 .. autoclass:: SPEECHCOMMANDS
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 TEDLIUM
 ~~~~~~~~~~~~~~

 .. autoclass:: TEDLIUM
-  :members: __getitem__
-  :special-members: get_phoneme_dict
+  :members:
+  :special-members: __getitem__
+

 VCTK
 ~~~~

 .. autoclass:: VCTK
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__


 VCTK_092
 ~~~~~~~~

 .. autoclass:: VCTK_092
+  :members:
+  :special-members: __getitem__


 YESNO
 ~~~~~

 .. autoclass:: YESNO
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
--- a/torchaudio/datasets/cmuarctic.py
+++ b/torchaudio/datasets/cmuarctic.py
@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,


 class CMUARCTIC(Dataset):
-    """
-    Create a Dataset for CMU_arctic. Each item is a tuple of the form:
-    waveform, sample_rate, utterance, utterance_id
+    """Create a Dataset for CMU_ARCTIC.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional):
+            The URL to download the dataset from or the type of the dataset to dowload.
+            (default: ``"aew"``)
+            Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
+            ``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
+            ``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"ARCTIC"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """

    _file_text = "txt.done.data"
@@ -143,6 +154,14 @@ class CMUARCTIC(Dataset):
            self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, utterance, utterance_id)``
+        """
        line = self._walker[n]
        return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)


--- a/torchaudio/datasets/commonvoice.py
+++ b/torchaudio/datasets/commonvoice.py
@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],


 class COMMONVOICE(Dataset):
-    """
-    Create a Dataset for CommonVoice. Each item is a tuple of the form:
-    (waveform, sample_rate, dictionary)
-    where dictionary is a dictionary built from the tsv file with the following keys:
-    client_id, path, sentence, up_votes, down_votes, age, gender, accent.
+    """Create a Dataset for CommonVoice.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        tsv (str, optional): The name of the tsv file used to construct the metadata.
+            (default: ``"train.tsv"``)
+        url (str, optional): The URL to download the dataset from, or the language of
+            the dataset to download. (default: ``"english"``).
+            Allowed language values are ``"tatar"``, ``"english"``, ``"german"``,
+            ``"french"``, ``"welsh"``, ``"breton"``, ``"chuvash"``, ``"turkish"``, ``"kyrgyz"``,
+            ``"irish"``, ``"kabyle"``, ``"catalan"``, ``"taiwanese"``, ``"slovenian"``,
+            ``"italian"``, ``"dutch"``, ``"hakha chin"``, ``"esperanto"``, ``"estonian"``,
+            ``"persian"``, ``"portuguese"``, ``"basque"``, ``"spanish"``, ``"chinese"``,
+            ``"mongolian"``, ``"sakha"``, ``"dhivehi"``, ``"kinyarwanda"``, ``"swedish"``,
+            ``"russian"``, ``"indonesian"``, ``"arabic"``, ``"tamil"``, ``"interlingua"``,
+            ``"latvian"``, ``"japanese"``, ``"votic"``, ``"abkhaz"``, ``"cantonese"`` and
+            ``"romansh sursilvan"``.
+        folder_in_archive (str, optional): The top-level directory of the dataset.
+        version (str): Version string. (default: ``"cv-corpus-4-2019-12-10"``)
+            For the other allowed values, Please checkout https://commonvoice.mozilla.org/en/datasets.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """

    _ext_txt = ".txt"
@@ -192,6 +209,16 @@ class COMMONVOICE(Dataset):
            self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, dictionary)``,  where dictionary is built
+            from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
+            ``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
+        """
        line = self._walker[n]
        return load_commonvoice_item(line, self._header, self._path, self._folder_audio)


--- a/torchaudio/datasets/gtzan.py
+++ b/torchaudio/datasets/gtzan.py
 import os
 import warnings
-from typing import Any, Tuple
+from typing import Any, Tuple, Optional

 import torchaudio
 from torch import Tensor
@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str


 class GTZAN(Dataset):
-    """
-    Create a Dataset for GTZAN. Each item is a tuple of the form:
-    waveform, sample_rate, label.
+    """Create a Dataset for GTZAN.
+
+    Note:
+        Please see http://marsyas.info/downloads/datasets.html if you are planning to use
+        this dataset to publish results.

-    Please see http://marsyas.info/downloads/datasets.html
-    if you are planning to use this dataset to publish results.
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
+        folder_in_archive (str, optional): The top-level directory of the dataset.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        subset (str, optional): Which subset of the dataset to use.
+            One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
+            If ``None``, the entire dataset is used. (default: ``None``).
    """

    _ext_audio = ".wav"
@@ -1014,7 +1024,7 @@ class GTZAN(Dataset):
        url: str = URL,
        folder_in_archive: str = FOLDER_IN_ARCHIVE,
        download: bool = False,
-        subset: Any = None,
+        subset: Optional[str] = None,
    ) -> None:

        # super(GTZAN, self).__init__()
@@ -1082,6 +1092,14 @@ class GTZAN(Dataset):
                self._walker = filtered_test

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, label)``
+        """
        fileid = self._walker[n]
        item = load_gtzan_item(fileid, self._path, self._ext_audio)
        waveform, sample_rate, label = item

--- a/torchaudio/datasets/librispeech.py
+++ b/torchaudio/datasets/librispeech.py
@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,


 class LIBRISPEECH(Dataset):
-    """
-    Create a Dataset for LibriSpeech. Each item is a tuple of the form:
-    waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id
+    """Create a Dataset for LibriSpeech.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriSpeech"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """

    _ext_txt = ".trans.txt"
@@ -117,6 +127,14 @@ class LIBRISPEECH(Dataset):
        self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
+        """
        fileid = self._walker[n]
        return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt)


--- a/torchaudio/datasets/libritts.py
+++ b/torchaudio/datasets/libritts.py
@@ -65,9 +65,19 @@ def load_libritts_item(


 class LIBRITTS(Dataset):
-    """
-    Create a Dataset for LibriTTS. Each item is a tuple of the form:
-    waveform, sample_rate, original_text, normalized_text, speaker_id, chapter_id, utterance_id
+    """Create a Dataset for LibriTTS.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriTTS"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """

    _ext_original_txt = ".original.txt"
@@ -118,6 +128,15 @@ class LIBRITTS(Dataset):
        self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, original_text, normalized_text, speaker_id,
+            chapter_id, utterance_id)``
+        """
        fileid = self._walker[n]
        return load_libritts_item(
            fileid,

--- a/torchaudio/datasets/ljspeech.py
+++ b/torchaudio/datasets/ljspeech.py
@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens


 class LJSPEECH(Dataset):
-    """
-    Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
-    waveform, sample_rate, transcript, normalized_transcript
+    """Create a Dataset for LJSpeech-1.1.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"wavs"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """

    _ext_audio = ".wav"
@@ -68,6 +75,14 @@ class LJSPEECH(Dataset):
            self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, transcript, normalized_transcript)``
+        """
        line = self._walker[n]
        return load_ljspeech_item(line, self._path, self._ext_audio)


--- a/torchaudio/datasets/speechcommands.py
+++ b/torchaudio/datasets/speechcommands.py
@@ -36,9 +36,18 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str


 class SPEECHCOMMANDS(Dataset):
-    """
-    Create a Dataset for Speech Commands. Each item is a tuple of the form:
-    waveform, sample_rate, label, speaker_id, utterance_number
+    """Create a Dataset for Speech Commands.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"speech_commands_v0.01"`` and ``"speech_commands_v0.02"``
+            (default: ``"speech_commands_v0.02"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"SpeechCommands"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """

    def __init__(self,
@@ -75,6 +84,14 @@ class SPEECHCOMMANDS(Dataset):
        self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, label, speaker_id, utterance_number)``
+        """
        fileid = self._walker[n]
        return load_speechcommands_item(fileid, self._path)


--- a/torchaudio/datasets/tedlium.py
+++ b/torchaudio/datasets/tedlium.py
@@ -43,44 +43,21 @@ _RELEASE_CONFIGS = {

 class TEDLIUM(Dataset):
    """
-    Create a Dataset for Tedlium. It supports releases 1,2 and 3, each item is a list containings:
-    [waveform, sample_rate, transcript, talk_id, speaker_id, identifier].
-
-    Constructor arguments:
+    Create a Dataset for Tedlium. It supports releases 1,2 and 3.

    Args:
-        root (str): Path containing dataset or target path where its downloaded if needed
-        release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
-        subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
-        download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
-        audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
-
-    Special functions:
-
-    _load_tedlium_item: Loads a TEDLIUM dataset sample given a file name and corresponding sentence name
-
-    _load_audio: Default load function used in TEDLIUM dataset, you can overwrite this function to customize
-                 functionality and load individual sentences from a full ted audio talk file
-
-    get_phoneme_dict: Returns the phoneme dictionary of a TEDLIUM release
-
+        root (str): Path to the directory where the dataset is found or downloaded.
+        release (str, optional): Release version.
+            Allowed values are ``"release1"``, ``"release2"`` or ``"release3"``.
+            (default: ``"release1"``).
+        subset (str, optional): The subset of dataset to use. Valid options are ``"train"``, ``"dev"``,
+            and ``"test"`` for releases 1&2, ``None`` for release3. Defaults to ``"train"`` or ``None``.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
    """
-
    def __init__(
        self, root: str, release: str = "release1", subset: str = None, download: bool = False, audio_ext=".sph"
    ) -> None:
-        """Constructor for TEDLIUM dataset.
-
-        Args:
-            root (str): Path containing dataset or target path where its downloaded if needed
-            release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
-            subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
-            download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
-            audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
-
-        Raises:
-            RuntimeError: If release identifier does not match any supported release,
-        """
        self._ext_audio = audio_ext
        if release in _RELEASE_CONFIGS.keys():
            folder_in_archive = _RELEASE_CONFIGS[release]["folder_in_archive"]
@@ -140,7 +117,7 @@ class TEDLIUM(Dataset):
            path (str): Dataset root path

        Returns:
-            Tedlium_item: A namedTuple containing [waveform, sample_rate, transcript, talk_id, speaker_id, identifier]
+            tuple: ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
        """
        transcript_path = os.path.join(path, "stm", fileid)
        with open(transcript_path + ".stm") as f:
@@ -171,14 +148,13 @@ class TEDLIUM(Dataset):
        return torchaudio.load(path)[:, start_time:end_time]

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
-        """TEDLIUM dataset custom function overwritting default loadbehaviour
-        Loads a TEDLIUM sample given a index N.
+        """Load the n-th sample from the dataset.

        Args:
-            n (int): Index of sample to be loaded
+            n (int): The index of the sample to be loaded

        Returns:
-            Tedlium_item: A namedTuple containing [waveform, sample_rate, transcript, talk_id, speaker_id, identifier]
+            tuple: ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
        """
        fileid, line = self._filelist[n]
        return self._load_tedlium_item(fileid, line, self._path)
@@ -193,10 +169,8 @@ class TEDLIUM(Dataset):

    @property
    def phoneme_dict(self):
-        """Returns the phoneme dictionary of a TEDLIUM release.
-
-        Returns:
-            dictionary: Phoneme dictionary for the current tedlium release
+        """dict[str, tuple[str]]: Phonemes. Mapping from word to tuple of phonemes.
+        Note that some words have empty phonemes.
        """
        # Read phoneme dictionary
        if not self._phoneme_dict:

--- a/torchaudio/datasets/vctk.py
+++ b/torchaudio/datasets/vctk.py
@@ -54,12 +54,25 @@ def load_vctk_item(fileid: str,


 class VCTK(Dataset):
-    """
-    Create a Dataset for VCTK. Each item is a tuple of the form:
-    (waveform, sample_rate, utterance, speaker_id, utterance_id)
+    """Create a Dataset for VCTK.
+
+    Note:
+        * **This dataset is no longer publicly available.** Please use :py:class:`VCTK_092`
+        * Directory ``p315`` is ignored because there is no corresponding text files.
+          For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443

-    Folder `p315` will be ignored due to the non-existent corresponding text files.
-    For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): Not used as the dataset is no longer publicly available.
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"VCTK-Corpus"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+            Giving ``download=True`` will result in error as the dataset is no longer
+            publicly available.
+        downsample (bool, optional): Not used.
+        transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
+        target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
    """

    _folder_txt = "txt"
@@ -118,6 +131,14 @@ class VCTK(Dataset):
        self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
+        """
        fileid = self._walker[n]
        item = load_vctk_item(
            fileid,
@@ -145,14 +166,13 @@ class VCTK(Dataset):
 class VCTK_092(Dataset):
    """Create VCTK 0.92 Dataset

-    An item is a ``namedtuple`` of (``waveform``, ``sample_rate``, ``utterance``,
-    ``speaker_id``, ``utterance_id``)
-
    Args:
        root (str): Root directory where the dataset's top level directory is found.
-        mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``
-        download (bool, optional): Download the dataset if not found in the given directory.
-        url (str, optional): URL from which the dataset is downloaded.
+        mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
        audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.

    Note:
@@ -252,6 +272,14 @@ class VCTK_092(Dataset):
        return Sample(waveform, sample_rate, utterance, speaker_id, utterance_id)

    def __getitem__(self, n: int) -> Sample:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
+        """
        speaker_id, utterance_id = self._sample_ids[n]
        return self._load_sample(speaker_id, utterance_id, self._mic_id)


--- a/torchaudio/datasets/yesno.py
+++ b/torchaudio/datasets/yesno.py
@@ -31,9 +31,18 @@ def load_yesno_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, int


 class YESNO(Dataset):
-    """
-    Create a Dataset for YesNo. Each item is a tuple of the form:
-    (waveform, sample_rate, labels)
+    """Create a Dataset for YesNo.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"http://www.openslr.org/resources/1/waves_yesno.tar.gz"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"waves_yesno"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
+        target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
    """

    _ext_audio = ".wav"
@@ -78,6 +87,14 @@ class YESNO(Dataset):
        self._walker = list(walker)

    def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, labels)``
+        """
        fileid = self._walker[n]
        item = load_yesno_item(fileid, self._path, self._ext_audio)