Standardize optional types in docstrings (#1746)

768432c3 · Caroline Chen · GitHub · d9bfb708 · 768432c3 · 768432c3
Unverified Commit 768432c3 authored Sep 02, 2021 by Caroline Chen Committed by GitHub Sep 02, 2021
20 changed files
--- a/examples/pipeline_tacotron2/datasets.py
+++ b/examples/pipeline_tacotron2/datasets.py
@@ -131,7 +131,7 @@ def text_mel_collate_fn(batch: Tuple[Tensor, Tensor],
    Args:
        batch (tuple of two tensors): the first tensor is the mel spectrogram with shape
            (n_batch, n_mels, n_frames), the second tensor is the text with shape (n_batch, ).
-        n_frames_per_step (int): The number of frames to advance every step.
+        n_frames_per_step (int, optional): The number of frames to advance every step.
    Returns:
        text_padded (Tensor): The input text to Tacotron2 with shape (n_batch, max of ``text_lengths``).

--- a/examples/pipeline_tacotron2/text/text_preprocessing.py
+++ b/examples/pipeline_tacotron2/text/text_preprocessing.py
@@ -123,12 +123,12 @@ def text_to_sequence(sent: str,
        symbol_list (str or List of string, optional): When the input is a string, available options include
            "english_characters" and "english_phonemes". When the input is a list of string, ``symbol_list`` will
            directly be used as the symbol to encode. (Default: "english_characters")
-        phonemizer (str, optional): The phonemizer to use. Only used when ``symbol_list`` is "english_phonemes".
+        phonemizer (str or None, optional): The phonemizer to use. Only used when ``symbol_list`` is "english_phonemes".
            Available options include "DeepPhonemizer". (Default: "DeepPhonemizer")
-        checkpoint (str, optional): The path to the checkpoint of the phonemizer. Only used when ``symbol_list`` is
+        checkpoint (str or None, optional): The path to the checkpoint of the phonemizer. Only used when
-            "english_phonemes". (Default: "./en_us_cmudict_forward.pt")
+            ``symbol_list`` is "english_phonemes". (Default: "./en_us_cmudict_forward.pt")
-        cmudict_root (str, optional): The path to the directory where the CMUDict dataset is found or downloaded.
+        cmudict_root (str or None, optional): The path to the directory where the CMUDict dataset is found or
-            Only used when ``symbol_list`` is "english_phonemes". (Default: "./")
+            downloaded. Only used when ``symbol_list`` is "english_phonemes". (Default: "./")
    Returns:
        List of integers corresponding to the symbols in the sentence.

--- a/examples/pipeline_wavernn/wavernn_inference_wrapper.py
+++ b/examples/pipeline_wavernn/wavernn_inference_wrapper.py
@@ -171,13 +171,13 @@ class WaveRNNInferenceWrapper(torch.nn.Module):
        Args:
            specgram (Tensor): spectrogram of size (n_mels, n_time)
-            mulaw (bool): Whether to perform mulaw decoding (Default: ``True``).
+            mulaw (bool, optional): Whether to perform mulaw decoding (Default: ``True``).
-            batched (bool): Whether to perform batch prediction. Using batch prediction
+            batched (bool, optional): Whether to perform batch prediction. Using batch prediction
                will significantly increase the inference speed (Default: ``True``).
-            timesteps (int): The time steps for each batch. Only used when `batched`
+            timesteps (int, optional): The time steps for each batch. Only used when `batched`
                is set to True (Default: ``100``).
-            overlap (int): The overlapping time steps between batches. Only used when `batched`
+            overlap (int, optional): The overlapping time steps between batches. Only used when
-                is set to True (Default: ``5``).
+                `batched` is set to True (Default: ``5``).
        Returns:
            waveform (Tensor): Reconstructed waveform of size (1, n_time, ).

--- a/examples/source_separation/utils/dataset/wsj0mix.py
+++ b/examples/source_separation/utils/dataset/wsj0mix.py
@@ -19,7 +19,7 @@ class WSJ0Mix(Dataset):
            N source audios.
        sample_rate (int): Expected sample rate of audio files. If any of the audio has a
            different sample rate, raises ``ValueError``.
-        audio_ext (str): The extension of audio files to find. (default: ".wav")
+        audio_ext (str, optional): The extension of audio files to find. (default: ".wav")
    """
    def __init__(
        self,

--- a/examples/source_separation/utils/metrics.py
+++ b/examples/source_separation/utils/metrics.py
@@ -21,9 +21,9 @@ def sdr(
            Shape: [batch, speakers (can be 1), time frame]
        reference (torch.Tensor): Reference signal.
            Shape: [batch, speakers, time frame]
-        mask (Optional[torch.Tensor]): Binary mask to indicate padded value (0) or valid value (1).
+        mask (torch.Tensor or None, optional): Binary mask to indicate padded value (0) or valid value (1).
            Shape: [batch, 1, time frame]
-        epsilon (float): constant value used to stabilize division.
+        epsilon (float, optional): constant value used to stabilize division.
    Returns:
        torch.Tensor: scale-invariant source-to-distortion ratio.
@@ -99,9 +99,9 @@ class PIT(torch.nn.Module):
                Shape: [bacth, speakers, time frame]
            reference (torch.Tensor): Reference (original) source signals.
                Shape: [batch, speakers, time frame]
-            mask (Optional[torch.Tensor]): Binary mask to indicate padded value (0) or valid value (1).
+            mask (torch.Tensor or None, optional): Binary mask to indicate padded value (0) or valid value (1).
                Shape: [batch, 1, time frame]
-            epsilon (float): constant value used to stabilize division.
+            epsilon (float, optional): constant value used to stabilize division.
        Returns:
            torch.Tensor: Maximum criterion over the speaker permutation.
@@ -140,9 +140,9 @@ def sdr_pit(
            Shape: [batch, speakers (can be 1), time frame]
        reference (torch.Tensor): Reference signal.
            Shape: [batch, speakers, time frame]
-        mask (Optional[torch.Tensor]): Binary mask to indicate padded value (0) or valid value (1).
+        mask (torch.Tensor or None, optional): Binary mask to indicate padded value (0) or valid value (1).
            Shape: [batch, 1, time frame]
-        epsilon (float): constant value used to stabilize division.
+        epsilon (float, optional): constant value used to stabilize division.
    Returns:
        torch.Tensor: scale-invariant source-to-distortion ratio.
@@ -187,9 +187,9 @@ def sdri(
            Shape: [batch, speakers, time frame]
        mix (torch.Tensor): Mixed souce signals, from which the setimated signals were generated.
            Shape: [batch, speakers == 1, time frame]
-        mask (Optional[torch.Tensor]): Binary mask to indicate padded value (0) or valid value (1).
+        mask (torch.Tensor or None, optional): Binary mask to indicate padded value (0) or valid value (1).
            Shape: [batch, 1, time frame]
-        epsilon (float): constant value used to stabilize division.
+        epsilon (float, optional): constant value used to stabilize division.
    Returns:
        torch.Tensor: Improved SDR. Shape: [batch, ]

--- a/torchaudio/backend/soundfile_backend.py
+++ b/torchaudio/backend/soundfile_backend.py
@@ -92,7 +92,7 @@ def info(filepath: str, format: Optional[str] = None) -> AudioMetaData:
    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
-        format (str, optional):
+        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.
    Returns:
@@ -168,23 +168,23 @@ def load(
    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
-        frame_offset (int):
+        frame_offset (int, optional):
            Number of frames to skip before start reading data.
-        num_frames (int):
+        num_frames (int, optional):
            Maximum number of frames to read. ``-1`` reads all the remaining samples,
            starting from ``frame_offset``.
            This function may return the less number of frames if there is not enough
            frames in the given file.
-        normalize (bool):
+        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type.
            This argument has no effect for formats other than integer WAV type.
-        channels_first (bool):
+        channels_first (bool, optional):
            When True, the returned Tensor has dimension ``[channel, time]``.
            Otherwise, the returned Tensor's dimension is ``[time, channel]``.
-        format (str, optional):
+        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.
    Returns:
@@ -335,11 +335,11 @@ def save(
        filepath (str or pathlib.Path): Path to audio file.
        src (torch.Tensor): Audio data to save. must be 2D tensor.
        sample_rate (int): sampling rate
-        channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``,
+        channels_first (bool, optional): If ``True``, the given tensor is interpreted as ``[channel, time]``,
            otherwise ``[time, channel]``.
-        compression (Optional[float]): Not used.
+        compression (float of None, optional): Not used.
            It is here only for interface compatibility reson with "sox_io" backend.
-        format (str, optional): Override the audio format.
+        format (str or None, optional): Override the audio format.
            When ``filepath`` argument is path-like object, audio format is
            inferred from file extension. If the file extension is missing or
            different, you can specify the correct format with this argument.
@@ -349,7 +349,7 @@ def save(
            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
            ``"flac"`` and ``"sph"``.
-        encoding (str, optional): Changes the encoding for supported formats.
+        encoding (str or None, optional): Changes the encoding for supported formats.
            This argument is effective only for supported formats, sush as
            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are;
@@ -359,7 +359,7 @@ def save(
                - ``"ULAW"`` (mu-law)
                - ``"ALAW"`` (a-law)
-        bits_per_sample (int, optional): Changes the bit depth for the
+        bits_per_sample (int or None, optional): Changes the bit depth for the
            supported formats.
            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
            you can change the bit depth.

--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -37,7 +37,7 @@ def info(
                  * This argument is intentionally annotated as ``str`` only due to
                    TorchScript compiler compatibility.
-        format (str, optional):
+        format (str or None, optional):
            Override the format detection with the given format.
            Providing the argument might help when libsox can not infer the format
            from header or extension,
@@ -119,21 +119,21 @@ def load(
            TorchScript compiler compatibility.
        frame_offset (int):
            Number of frames to skip before start reading data.
-        num_frames (int):
+        num_frames (int, optional):
            Maximum number of frames to read. ``-1`` reads all the remaining samples,
            starting from ``frame_offset``.
            This function may return the less number of frames if there is not enough
            frames in the given file.
-        normalize (bool):
+        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type.
            This argument has no effect for formats other than integer WAV type.
-        channels_first (bool):
+        channels_first (bool, optional):
            When True, the returned Tensor has dimension ``[channel, time]``.
            Otherwise, the returned Tensor's dimension is ``[time, channel]``.
-        format (str, optional):
+        format (str or None, optional):
            Override the format detection with the given format.
            Providing the argument might help when libsox can not infer the format
            from header or extension,
@@ -172,9 +172,9 @@ def save(
            as ``str`` for TorchScript compiler compatibility.
        src (torch.Tensor): Audio data to save. must be 2D tensor.
        sample_rate (int): sampling rate
-        channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``,
+        channels_first (bool, optional): If ``True``, the given tensor is interpreted as ``[channel, time]``,
            otherwise ``[time, channel]``.
-        compression (Optional[float]): Used for formats other than WAV.
+        compression (float or None, optional): Used for formats other than WAV.
            This corresponds to ``-C`` option of ``sox`` command.
            ``"mp3"``
@@ -189,7 +189,7 @@ def save(
                and lowest quality. Default: ``3``.
            See the detail at http://sox.sourceforge.net/soxformat.html.
-        format (str, optional): Override the audio format.
+        format (str or None, optional): Override the audio format.
            When ``filepath`` argument is path-like object, audio format is infered from
            file extension. If file extension is missing or different, you can specify the
            correct format with this argument.
@@ -199,7 +199,7 @@ def save(
            Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
            ``"amb"``, ``"flac"``, ``"sph"``, ``"gsm"``, and ``"htk"``.
-        encoding (str, optional): Changes the encoding for the supported formats.
+        encoding (str or None, optional): Changes the encoding for the supported formats.
            This argument is effective only for supported formats, such as ``"wav"``, ``""amb"``
            and ``"sph"``. Valid values are;
@@ -225,7 +225,7 @@ def save(
                ``"sph"`` format;
                    - the default value is ``"PCM_S"``
-        bits_per_sample (int, optional): Changes the bit depth for the supported formats.
+        bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
            When ``format`` is one of ``"wav"``, ``"flac"``, ``"sph"``, or ``"amb"``, you can change the
            bit depth. Valid values are ``8``, ``16``, ``32`` and ``64``.

--- a/torchaudio/backend/utils.py
+++ b/torchaudio/backend/utils.py
@@ -35,7 +35,7 @@ def set_audio_backend(backend: Optional[str]):
    """Set the backend for I/O operation
    Args:
-        backend (Optional[str]): Name of the backend.
+        backend (str or None): Name of the backend.
            One of ``"sox_io"`` or ``"soundfile"`` based on availability
            of the system. If ``None`` is provided the  current backend is unassigned.
    """

--- a/torchaudio/datasets/gtzan.py
+++ b/torchaudio/datasets/gtzan.py
@@ -1011,7 +1011,7 @@ class GTZAN(Dataset):
        folder_in_archive (str, optional): The top-level directory of the dataset.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
-        subset (str, optional): Which subset of the dataset to use.
+        subset (str or None, optional): Which subset of the dataset to use.
            One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
            If ``None``, the entire dataset is used. (default: ``None``).
    """

--- a/torchaudio/datasets/speechcommands.py
+++ b/torchaudio/datasets/speechcommands.py
@@ -65,7 +65,7 @@ class SPEECHCOMMANDS(Dataset):
            The top-level directory of the dataset. (default: ``"SpeechCommands"``)
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
-        subset (Optional[str]):
+        subset (str or None, optional):
            Select a subset of the dataset [None, "training", "validation", "testing"]. None means
            the whole dataset. "validation" and "testing" are defined in "validation_list.txt" and
            "testing_list.txt", respectively, and "training" is the rest. Details for the files

--- a/torchaudio/datasets/tedlium.py
+++ b/torchaudio/datasets/tedlium.py
@@ -55,6 +55,7 @@ class TEDLIUM(Dataset):
            and ``"test"`` for releases 1&2, ``None`` for release3. Defaults to ``"train"`` or ``None``.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        audio_ext (str, optional): extension for audio file (default: ``"audio_ext"``)
    """
    def __init__(
        self,
@@ -62,7 +63,7 @@ class TEDLIUM(Dataset):
        release: str = "release1",
        subset: str = None,
        download: bool = False,
-        audio_ext=".sph"
+        audio_ext: str = ".sph"
    ) -> None:
        self._ext_audio = audio_ext
        if release in _RELEASE_CONFIGS.keys():
@@ -144,8 +145,9 @@ class TEDLIUM(Dataset):
        Args:
            path (str): Path to audio file
-            start_time (int, optional): Time in seconds where the sample sentence stars
+            start_time (int): Time in seconds where the sample sentence stars
-            end_time (int, optional): Time in seconds where the sample sentence finishes
+            end_time (int): Time in seconds where the sample sentence finishes
+            sample_rate (float, optional): Sampling rate
        Returns:
            [Tensor, int]: Audio tensor representation and sample rate

--- a/torchaudio/datasets/utils.py
+++ b/torchaudio/datasets/utils.py
@@ -22,7 +22,7 @@ def stream_url(url: str,
    Args:
        url (str): Url.
-        start_byte (int, optional): Start streaming at that point (Default: ``None``).
+        start_byte (int or None, optional): Start streaming at that point (Default: ``None``).
        block_size (int, optional): Size of chunks to stream (Default: ``32 * 1024``).
        progress_bar (bool, optional): Display a progress bar (Default: ``True``).
    """
@@ -68,8 +68,9 @@ def download_url(url: str,
    Args:
        url (str): Url.
        download_folder (str): Folder to download file.
-        filename (str, optional): Name of downloaded file. If None, it is inferred from the url (Default: ``None``).
+        filename (str or None, optional): Name of downloaded file. If None, it is inferred from the url
-        hash_value (str, optional): Hash for url (Default: ``None``).
+            (Default: ``None``).
+        hash_value (str or None, optional): Hash for url (Default: ``None``).
        hash_type (str, optional): Hash type, among "sha256" and "md5" (Default: ``"sha256"``).
        progress_bar (bool, optional): Display a progress bar (Default: ``True``).
        resume (bool, optional): Enable resuming download (Default: ``False``).
@@ -149,7 +150,8 @@ def extract_archive(from_path: str, to_path: Optional[str] = None, overwrite: bo
    """Extract archive.
    Args:
        from_path (str): the path of the archive.
-        to_path (str, optional): the root path of the extraced files (directory of from_path) (Default: ``None``)
+        to_path (str or None, optional): the root path of the extraced files (directory of from_path)
+            (Default: ``None``)
        overwrite (bool, optional): overwrite existing files (Default: ``False``)
    Returns:

--- a/torchaudio/datasets/vctk.py
+++ b/torchaudio/datasets/vctk.py
@@ -150,7 +150,7 @@ class VCTK_092(Dataset):
    Args:
        root (str): Root directory where the dataset's top level directory is found.
-        mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
+        mic_id (str, optional): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional): The URL to download the dataset from.

--- a/torchaudio/functional/filtering.py
+++ b/torchaudio/functional/filtering.py
@@ -316,7 +316,7 @@ def contrast(waveform: Tensor, enhancement_amount: float = 75.0) -> Tensor:
    Args:
        waveform (Tensor): audio waveform of dimension of `(..., time)`
-        enhancement_amount (float): controls the amount of the enhancement
+        enhancement_amount (float, optional): controls the amount of the enhancement
            Allowed range of values for enhancement_amount : 0-100
            Note that enhancement_amount = 0 still gives a significant contrast enhancement
@@ -350,7 +350,7 @@ def dcshift(
        waveform (Tensor): audio waveform of dimension of `(..., time)`
        shift (float): indicates the amount to shift the audio
            Allowed range of values for shift : -2.0 to +2.0
-        limiter_gain (float): It is used only on peaks to prevent clipping
+        limiter_gain (float of None, optional): It is used only on peaks to prevent clipping
            It should have a value much less than 1 (e.g. 0.05 or 0.02)
    Returns:
@@ -690,20 +690,21 @@ def flanger(
        waveform (Tensor): audio waveform of dimension of `(..., channel, time)` .
            Max 4 channels allowed
        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
-        delay (float): desired delay in milliseconds(ms)
+        delay (float, optional): desired delay in milliseconds(ms)
            Allowed range of values are 0 to 30
-        depth (float): desired delay depth in milliseconds(ms)
+        depth (float, optional): desired delay depth in milliseconds(ms)
            Allowed range of values are 0 to 10
-        regen (float): desired regen(feedback gain) in dB
+        regen (float, optional): desired regen(feedback gain) in dB
            Allowed range of values are -95 to 95
-        width (float):  desired width(delay gain) in dB
+        width (float, optional):  desired width(delay gain) in dB
            Allowed range of values are 0 to 100
-        speed (float):  modulation speed in Hz
+        speed (float, optional):  modulation speed in Hz
            Allowed range of values are 0.1 to 10
-        phase (float):  percentage phase-shift for multi-channel
+        phase (float, optional):  percentage phase-shift for multi-channel
            Allowed range of values are 0 to 100
-        modulation (str):  Use either "sinusoidal" or "triangular" modulation. (Default: ``sinusoidal``)
+        modulation (str, optional):  Use either "sinusoidal" or "triangular" modulation. (Default: ``sinusoidal``)
-        interpolation (str): Use either "linear" or "quadratic" for delay-line interpolation. (Default: ``linear``)
+        interpolation (str, optional): Use either "linear" or "quadratic" for delay-line interpolation.
+            (Default: ``linear``)
    Returns:
        Tensor: Waveform of dimension of `(..., channel, time)`
@@ -1072,9 +1073,9 @@ def overdrive(waveform: Tensor, gain: float = 20, colour: float = 20) -> Tensor:
    Args:
        waveform (Tensor): audio waveform of dimension of `(..., time)`
-        gain (float): desired gain at the boost (or attenuation) in dB
+        gain (float, optional): desired gain at the boost (or attenuation) in dB
            Allowed range of values are 0 to 100
-        colour (float):  controls the amount of even harmonic content in the over-driven output
+        colour (float, optional):  controls the amount of even harmonic content in the over-driven output
            Allowed range of values are 0 to 100
    Returns:
@@ -1132,17 +1133,17 @@ def phaser(
    Args:
        waveform (Tensor): audio waveform of dimension of `(..., time)`
        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
-        gain_in (float): desired input gain at the boost (or attenuation) in dB
+        gain_in (float, optional): desired input gain at the boost (or attenuation) in dB
            Allowed range of values are 0 to 1
-        gain_out (float): desired output gain at the boost (or attenuation) in dB
+        gain_out (float, optional): desired output gain at the boost (or attenuation) in dB
            Allowed range of values are 0 to 1e9
-        delay_ms (float): desired delay in milliseconds
+        delay_ms (float, optional): desired delay in milliseconds
            Allowed range of values are 0 to 5.0
-        decay (float):  desired decay relative to gain-in
+        decay (float, optional):  desired decay relative to gain-in
            Allowed range of values are 0 to 0.99
-        mod_speed (float):  modulation speed in Hz
+        mod_speed (float, optional):  modulation speed in Hz
            Allowed range of values are 0.1 to 2
-        sinusoidal (bool):  If ``True``, uses sinusoidal modulation (preferable for multiple instruments)
+        sinusoidal (bool, optional):  If ``True``, uses sinusoidal modulation (preferable for multiple instruments)
            If ``False``, uses triangular modulation (gives single instruments a sharper phasing effect)
            (Default: ``True``)

--- a/torchaudio/functional/functional.py
+++ b/torchaudio/functional/functional.py
@@ -155,7 +155,7 @@ def inverse_spectrogram(
    Args:
        spectrogram (Tensor): Complex tensor of audio of dimension (..., freq, time).
-        length (int, optional): The output length of the waveform.
+        length (int or None): The output length of the waveform.
        pad (int): Two sided padding of signal. It is only effective when ``length`` is provided.
        window (Tensor): Window tensor that is applied/multiplied to each frame/window
        n_fft (int): Size of FFT
@@ -503,8 +503,8 @@ def create_fb_matrix(
        f_max (float): Maximum frequency (Hz)
        n_mels (int): Number of mel filterbanks
        sample_rate (int): Sample rate of the audio waveform
-        norm (Optional[str]): If 'slaney', divide the triangular mel weights by the width of the mel band
+        norm (str or None, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
-        (area normalization). (Default: ``None``)
+            (area normalization). (Default: ``None``)
        mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
    Returns:
@@ -549,8 +549,8 @@ def melscale_fbanks(
        f_max (float): Maximum frequency (Hz)
        n_mels (int): Number of mel filterbanks
        sample_rate (int): Sample rate of the audio waveform
-        norm (Optional[str]): If 'slaney', divide the triangular mel weights by the width of the mel band
+        norm (str or None, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
-        (area normalization). (Default: ``None``)
+            (area normalization). (Default: ``None``)
        mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
    Returns:
@@ -724,7 +724,7 @@ def complex_norm(
    Args:
        complex_tensor (Tensor): Tensor shape of `(..., complex=2)`
-        power (float): Power of the norm. (Default: `1.0`).
+        power (float, optional): Power of the norm. (Default: `1.0`).
    Returns:
        Tensor: Power of the normed input tensor. Shape of `(..., )`
@@ -771,7 +771,7 @@ def magphase(
    Args:
        complex_tensor (Tensor): Tensor shape of `(..., complex=2)`
-        power (float): Power of the norm. (Default: `1.0`)
+        power (float, optional): Power of the norm. (Default: `1.0`)
    Returns:
        (Tensor, Tensor): The magnitude and phase of the complex tensor
@@ -1343,14 +1343,14 @@ def apply_codec(
        waveform (Tensor): Audio data. Must be 2 dimensional. See also ```channels_first```.
        sample_rate (int): Sample rate of the audio waveform.
        format (str): File format.
-        channels_first (bool):
+        channels_first (bool, optional):
            When True, both the input and output Tensor have dimension ``[channel, time]``.
            Otherwise, they have dimension ``[time, channel]``.
-        compression (float): Used for formats other than WAV.
+        compression (float or None, optional): Used for formats other than WAV.
            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-        encoding (str, optional): Changes the encoding for the supported formats.
+        encoding (str or None, optional): Changes the encoding for the supported formats.
            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-        bits_per_sample (int, optional): Changes the bit depth for the supported formats.
+        bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
    Returns:
@@ -1614,7 +1614,7 @@ def resample(
            Lower values reduce anti-aliasing, but also reduce some of the highest frequencies. (Default: ``0.99``)
        resampling_method (str, optional): The resampling method to use.
            Options: [``sinc_interpolation``, ``kaiser_window``] (Default: ``'sinc_interpolation'``)
-        beta (float or None): The shape parameter used for kaiser window.
+        beta (float or None, optional): The shape parameter used for kaiser window.
    Returns:
        Tensor: The waveform at the new frequency of dimension (..., time).

--- a/torchaudio/models/conv_tasnet.py
+++ b/torchaudio/models/conv_tasnet.py
@@ -16,8 +16,8 @@ class ConvBlock(torch.nn.Module):
        hidden_channels (int): The number of channels in the internal layers, <H>.
        kernel_size (int): The convolution kernel size of the middle layer, <P>.
        padding (int): Padding value of the convolution in the middle layer.
-        dilation (int): Dilation value of the convolution in the middle layer.
+        dilation (int, optional): Dilation value of the convolution in the middle layer.
-        no_redisual (bool): Disable residual block/output.
+        no_redisual (bool, optional): Disable residual block/output.
    Note:
        This implementation corresponds to the "non-causal" setting in the paper.
@@ -169,14 +169,14 @@ class ConvTasNet(torch.nn.Module):
    [:footcite:`Luo_2019`].
    Args:
-        num_sources (int): The number of sources to split.
+        num_sources (int, optional): The number of sources to split.
-        enc_kernel_size (int): The convolution kernel size of the encoder/decoder, <L>.
+        enc_kernel_size (int, optional): The convolution kernel size of the encoder/decoder, <L>.
-        enc_num_feats (int): The feature dimensions passed to mask generator, <N>.
+        enc_num_feats (int, optional): The feature dimensions passed to mask generator, <N>.
-        msk_kernel_size (int): The convolution kernel size of the mask generator, <P>.
+        msk_kernel_size (int, optional): The convolution kernel size of the mask generator, <P>.
-        msk_num_feats (int): The input/output feature dimension of conv block in the mask generator, <B, Sc>.
+        msk_num_feats (int, optional): The input/output feature dimension of conv block in the mask generator, <B, Sc>.
-        msk_num_hidden_feats (int): The internal feature dimension of conv block of the mask generator, <H>.
+        msk_num_hidden_feats (int, optional): The internal feature dimension of conv block of the mask generator, <H>.
-        msk_num_layers (int): The number of layers in one conv block of the mask generator, <X>.
+        msk_num_layers (int, optional): The number of layers in one conv block of the mask generator, <X>.
-        msk_num_stacks (int): The numbr of conv blocks of the mask generator, <R>.
+        msk_num_stacks (int, optional): The numbr of conv blocks of the mask generator, <R>.
    Note:
        This implementation corresponds to the "non-causal" setting in the paper.

--- a/torchaudio/models/wav2vec2/components.py
+++ b/torchaudio/models/wav2vec2/components.py
@@ -49,7 +49,7 @@ class ConvLayerBlock(Module):
        """
        Args:
            x (Tensor): Shape: ``[batch, in_channels, in_frame]``.
-            length (Tensor, optional): Shape ``[batch, ]``.
+            length (Tensor or None, optional): Shape ``[batch, ]``.
        Returns:
            Tensor: Shape ``[batch, out_channels, out_frames]``.
            Optional[Tensor]: Shape ``[batch, ]``.
@@ -90,7 +90,7 @@ class FeatureExtractor(Module):
            x (Tensor):
                Input Tensor representing a batch of audio,
                shape: ``[batch, time]``.
-            length (Tensor, optional):
+            length (Tensor or None, optional):
                Valid length of each input sample. shape: ``[batch, ]``.
        Returns:
@@ -243,7 +243,7 @@ class SelfAttention(Module):
        """
        Args:
            x (Tensor): shape: ``[batch_size, sequence_length, embed_dim]``.
-            attention_mask (Tensor, optional):
+            attention_mask (Tensor or None, optional):
                shape: ``[batch_size, 1, sequence_length, sequence_length]``
        Returns:
@@ -340,7 +340,7 @@ class EncoderLayer(Module):
        """
        Args:
            x (Tensor): shape: ``(batch, sequence_length, embed_dim)``
-            attention_mask (Tensor, optional):
+            attention_mask (Tensor or None, optional):
                shape: ``(batch, 1, sequence_length, sequence_length)``
        """
        residual = x

--- a/torchaudio/models/wav2vec2/model.py
+++ b/torchaudio/models/wav2vec2/model.py
@@ -38,7 +38,7 @@ class Wav2Vec2Model(Module):
        Args:
            waveforms (Tensor): Audio tensor of shape ``(batch, frames)``.
-            lengths (Tensor, optional):
+            lengths (Tensor or None, optional):
                Indicates the valid length of each audio sample in the batch.
                Shape: ``(batch, )``.
@@ -62,7 +62,7 @@ class Wav2Vec2Model(Module):
        Args:
            waveforms (Tensor): Audio tensor of shape ``(batch, frames)``.
-            lengths (Tensor, optional):
+            lengths (Tensor or None, optional):
                Indicates the valid length of each audio sample in the batch.
                Shape: ``(batch, )``.

--- a/torchaudio/models/wav2vec2/utils/import_fairseq.py
+++ b/torchaudio/models/wav2vec2/utils/import_fairseq.py
@@ -133,7 +133,7 @@ def import_fairseq_model(
            An instance of fairseq's Wav2Vec2.0 model class.
            Either ``fairseq.models.wav2vec.wav2vec2_asr.Wav2VecEncoder`` or
            ``fairseq.models.wav2vec.wav2vec2.Wav2Vec2Model``.
-        num_out (int, optional):
+        num_out (int or None, optional):
            The number of output labels. Required only when the original model is
            an instance of ``fairseq.models.wav2vec.wav2vec2.Wav2Vec2Model``.

--- a/torchaudio/sox_effects/sox_effects.py
+++ b/torchaudio/sox_effects/sox_effects.py
@@ -72,7 +72,7 @@ def apply_effects_tensor(
        tensor (torch.Tensor): Input 2D CPU Tensor.
        sample_rate (int): Sample rate
        effects (List[List[str]]): List of effects.
-        channels_first (bool): Indicates if the input Tensor's dimension is
+        channels_first (bool, optional): Indicates if the input Tensor's dimension is
            ``[channels, time]`` or ``[time, channels]``
    Returns:
@@ -185,15 +185,15 @@ def apply_effects_file(
            Note: This argument is intentionally annotated as ``str`` only for
            TorchScript compiler compatibility.
        effects (List[List[str]]): List of effects.
-        normalize (bool):
+        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type. This argument has no effect for formats other
            than integer WAV type.
-        channels_first (bool): When True, the returned Tensor has dimension ``[channel, time]``.
+        channels_first (bool, optional): When True, the returned Tensor has dimension ``[channel, time]``.
            Otherwise, the returned Tensor's dimension is ``[time, channel]``.
-        format (str, optional):
+        format (str or None, optional):
            Override the format detection with the given format.
            Providing the argument might help when libsox can not infer the format
            from header or extension,