Tweak documentation (#2656)

Summary: 1. Override class `__module__` attribute in `conf.py` so that no manual override is necessary 2. Fix SourceSeparationBundle member attribute Pull Request resolved: https://github.com/pytorch/audio/pull/2656 Reviewed By: carolineechen Differential Revision: D39293053 Pulled By: mthrok fbshipit-source-id: 2b8d6be1aee517d0e692043c26ac2438a787adc6

Tweak documentation (#2656)
Summary: 1. Override class `__module__` attribute in `conf.py` so that no manual override is necessary 2. Fix SourceSeparationBundle member attribute Pull Request resolved: https://github.com/pytorch/audio/pull/2656 Reviewed By: carolineechen Differential Revision: D39293053 Pulled By: mthrok fbshipit-source-id: 2b8d6be1aee517d0e692043c26ac2438a787adc6
8a0d7b36 · moto · Facebook GitHub Bot · 3430fd68 · 8a0d7b36 · 8a0d7b36
Commit 8a0d7b36 authored Sep 06, 2022 by moto Committed by Facebook GitHub Bot Sep 06, 2022
8 changed files
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,6 +17,7 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+import importlib
 import os
 import sys
@@ -348,7 +349,38 @@ def inject_minigalleries(app, what, name, obj, options, lines):
        lines.append("\n")
+# Overwrite the imported classes
+def fix_module_path(module, attribute):
+    attr = importlib.import_module(module)
+    for attr_ in attribute:
+        attr = getattr(attr, attr_)
+    attr.__module__ = module
+def fix_aliases():
+    patterns = {
+        "torchaudio.models": [
+            ["HuBERTPretrainModel"],
+            ["Wav2Vec2Model"],
+            ["RNNT"],
+            ["Tacotron2"],
+        ],
+        "torchaudio.pipelines": [
+            ["Tacotron2TTSBundle"],
+            ["Tacotron2TTSBundle", "TextProcessor"],
+            ["Tacotron2TTSBundle", "Vocoder"],
+        ],
+    }
+    for module, attributes in patterns.items():
+        for attribute in attributes:
+            fix_module_path(module, attribute)
+    if importlib.util.find_spec("torchaudio.flashlight_lib_text_decoder") is not None:
+        fix_module_path("torchaudio.models.decoder", ["CTCHypothesis"])
 def setup(app):
+    fix_aliases()
    app.connect("autodoc-process-docstring", inject_minigalleries)

--- a/docs/source/prototype.pipelines.rst
+++ b/docs/source/prototype.pipelines.rst
@@ -32,10 +32,10 @@ SourceSeparationBundle
 ~~~~~~~~~~~~~~~~~~~~~~
 .. autoclass:: SourceSeparationBundle
+   :members: sample_rate
   .. automethod:: get_model
-   .. automethod:: sample_rate
 CONVTASNET_BASE_LIBRI2MIX
 ~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/torchaudio/models/decoder/_ctc_decoder.py
+++ b/torchaudio/models/decoder/_ctc_decoder.py
@@ -190,10 +190,7 @@ class CTCDecoder:
    def __call__(
        self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None
    ) -> List[List[CTCHypothesis]]:
-        # Overriding the signature so that the return type is correct on Sphinx
+        """
-        """__call__(self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None) -> \
-            List[List[torchaudio.models.decoder.CTCHypothesis]]
        Args:
            emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
                probability distribution over labels; output of acoustic model.

--- a/torchaudio/models/wav2vec2/model.py
+++ b/torchaudio/models/wav2vec2/model.py
@@ -237,10 +237,7 @@ def wav2vec2_model(
    encoder_layer_drop: float,
    aux_num_out: Optional[int],
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build a custom Wav2Vec2Model
-    """wav2vec2_model(extractor_mode: str, extractor_conv_layer_config: Optional[List[Tuple[int, int, int]]], extractor_conv_bias: bool, encoder_embed_dim: int, encoder_projection_dropout: float, encoder_pos_conv_kernel: int, encoder_pos_conv_groups: int, encoder_num_layers: int, encoder_num_heads: int, encoder_attention_dropout: float, encoder_ff_interm_features: int, encoder_ff_interm_dropout: float, encoder_dropout: float, encoder_layer_norm_first: bool, encoder_layer_drop: float, aux_num_out: Optional[int]) -> torchaudio.models.Wav2Vec2Model
-    Build a custom Wav2Vec2Model
    Note:
        The "feature extractor" below corresponds to
@@ -396,10 +393,7 @@ def wav2vec2_base(
    encoder_layer_drop: float = 0.1,
    aux_num_out: Optional[int] = None,
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build Wav2Vec2Model with "base" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
-    """wav2vec2_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
-    Build Wav2Vec2Model with "base" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
    Args:
        encoder_projection_dropout (float):
@@ -447,10 +441,7 @@ def wav2vec2_large(
    encoder_layer_drop: float = 0.1,
    aux_num_out: Optional[int] = None,
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build Wav2Vec2Model with "large" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
-    """wav2vec2_large(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
-    Build Wav2Vec2Model with "large" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
    Args:
        encoder_projection_dropout (float):
@@ -498,10 +489,7 @@ def wav2vec2_large_lv60k(
    encoder_layer_drop: float = 0.1,
    aux_num_out: Optional[int] = None,
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build Wav2Vec2Model with "large lv-60k" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
-    """wav2vec2_large_lv60k( encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
-    Build Wav2Vec2Model with "large lv-60k" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
    Args:
        encoder_projection_dropout (float):
@@ -549,10 +537,7 @@ def hubert_base(
    encoder_layer_drop: float = 0.05,
    aux_num_out: Optional[int] = None,
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build HuBERT model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
-    """hubert_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.05, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
-    Build HuBERT model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
    Args:
        encoder_projection_dropout (float):
@@ -600,10 +585,7 @@ def hubert_large(
    encoder_layer_drop: float = 0.0,
    aux_num_out: Optional[int] = None,
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build HuBERT model with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
-    """hubert_large(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
-    Build HuBERT model with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
    Args:
        encoder_projection_dropout (float):
@@ -651,10 +633,7 @@ def hubert_xlarge(
    encoder_layer_drop: float = 0.0,
    aux_num_out: Optional[int] = None,
 ) -> Wav2Vec2Model:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build HuBERT model with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
-    """hubert_xlarge(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
-    Build HuBERT model with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
    Args:
        encoder_projection_dropout (float):
@@ -728,10 +707,7 @@ def hubert_pretrain_model(
    final_dim: int,
    feature_grad_mult: Optional[float],
 ) -> HuBERTPretrainModel:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build a custom HuBERTPretrainModel for training from scratch
-    """hubert_pretrain_model(extractor_mode: str, extractor_conv_layer_config: Optional[List[Tuple[int, int, int]]], extractor_conv_bias: bool, encoder_embed_dim: int, encoder_projection_dropout: float, encoder_pos_conv_kernel: int, encoder_pos_conv_groups: int, encoder_num_layers: int, encoder_num_heads: int, encoder_attention_dropout: float, encoder_ff_interm_features: int, encoder_ff_interm_dropout: float, encoder_dropout: float, encoder_layer_norm_first: bool, encoder_layer_drop: float, mask_prob: float, mask_selection: str, mask_other: float, mask_length: int, no_mask_overlap: bool, mask_min_space: int, mask_channel_prob: float, mask_channel_selection: str, mask_channel_other: float, mask_channel_length: int, no_mask_channel_overlap: bool, mask_channel_min_space: int, skip_masked: bool, skip_nomask: bool, num_classes: int, final_dim: int) -> torchaudio.models.HuBERTPretrainModel
-    Build a custom HuBERTPretrainModel for training from scratch
    Note:
        The "feature extractor" below corresponds to
@@ -999,10 +975,7 @@ def hubert_pretrain_base(
    feature_grad_mult: Optional[float] = 0.1,
    num_classes: int = 100,
 ) -> HuBERTPretrainModel:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build HuBERTPretrainModel model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
-    """hubert_pretrain_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.05, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = 0.1, num_classes: int = 100) -> torchaudio.models.HuBERTPretrainModel
-    Build HuBERTPretrainModel model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
    Args:
        encoder_projection_dropout (float):
@@ -1077,10 +1050,7 @@ def hubert_pretrain_large(
    mask_channel_length: int = 10,
    feature_grad_mult: Optional[float] = None,
 ) -> HuBERTPretrainModel:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build HuBERTPretrainModel model for pre-training with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
-    """hubert_pretrain_large(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = None) -> torchaudio.models.HuBERTPretrainModel
-    Build HuBERTPretrainModel model for pre-training with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
    Args:
        encoder_projection_dropout (float):
@@ -1153,10 +1123,7 @@ def hubert_pretrain_xlarge(
    mask_channel_length: int = 10,
    feature_grad_mult: Optional[float] = None,
 ) -> HuBERTPretrainModel:
-    # Overriding the signature so that the return type is correct on Sphinx
+    """Build HuBERTPretrainModel model for pre-training with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
-    """hubert_pretrain_xlarge(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = None) -> torchaudio.models.HuBERTPretrainModel
-    Build HuBERTPretrainModel model for pre-training with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
    Args:
        encoder_projection_dropout (float):

--- a/torchaudio/models/wav2vec2/utils/import_fairseq.py
+++ b/torchaudio/models/wav2vec2/utils/import_fairseq.py
@@ -125,10 +125,7 @@ def _convert_state_dict(state_dict):
 def import_fairseq_model(original: Module) -> Wav2Vec2Model:
-    # Overriding the signature so that the types are correct on Sphinx
+    """Build Wav2Vec2Model from the corresponding model object of `fairseq`_.
-    """import_fairseq_model(original: torch.nn.Module) -> torchaudio.models.Wav2Vec2Model
-    Build Wav2Vec2Model from the corresponding model object of `fairseq`_.
    Args:
        original (torch.nn.Module):

--- a/torchaudio/models/wav2vec2/utils/import_huggingface.py
+++ b/torchaudio/models/wav2vec2/utils/import_huggingface.py
@@ -48,9 +48,7 @@ def _build(config, original):
 def import_huggingface_model(original: Module) -> Wav2Vec2Model:
-    """import_huggingface_model(original: torch.nn.Module) -> torchaudio.models.Wav2Vec2Model
+    """Build Wav2Vec2Model from the corresponding model object of Hugging Face's `Transformers`_.
-    Build Wav2Vec2Model from the corresponding model object of Hugging Face's `Transformers`_.
    Args:
        original (torch.nn.Module): An instance of ``Wav2Vec2ForCTC`` from ``transformers``.

--- a/torchaudio/pipelines/_tts/interface.py
+++ b/torchaudio/pipelines/_tts/interface.py
@@ -155,18 +155,19 @@ class Tacotron2TTSBundle(ABC):
        See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_text_processor` for the usage.
        """
+        pass
    class Vocoder(_Vocoder):
        """Interface of the vocoder part of Tacotron2TTS pipeline
        See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_vocoder` for the usage.
        """
+        pass
    @abstractmethod
    def get_text_processor(self, *, dl_kwargs=None) -> TextProcessor:
-        # Overriding the signature so that the return type is correct on Sphinx
+        """Create a text processor
-        """get_text_processor(self, *, dl_kwargs=None) -> torchaudio.pipelines.Tacotron2TTSBundle.TextProcessor
-        Create a text processor
        For character-based pipeline, this processor splits the input text by character.
        For phoneme-based pipeline, this processor converts the input text (grapheme) to
@@ -235,10 +236,7 @@ class Tacotron2TTSBundle(ABC):
    @abstractmethod
    def get_vocoder(self, *, dl_kwargs=None) -> Vocoder:
-        # Overriding the signature so that the return type is correct on Sphinx
+        """Create a vocoder module, based off of either WaveRNN or GriffinLim.
-        """get_vocoder(self, *, dl_kwargs=None) -> torchaudio.pipelines.Tacotron2TTSBundle.Vocoder
-        Create a vocoder module, based off of either WaveRNN or GriffinLim.
        If a pre-trained weight file is necessary,
        :func:`torch.hub.load_state_dict_from_url` is used to downloaded it.
@@ -256,10 +254,7 @@ class Tacotron2TTSBundle(ABC):
    @abstractmethod
    def get_tacotron2(self, *, dl_kwargs=None) -> Tacotron2:
-        # Overriding the signature so that the return type is correct on Sphinx
+        """Create a Tacotron2 model with pre-trained weight.
-        """get_tacotron2(self, *, dl_kwargs=None) -> torchaudio.models.Tacotron2
-        Create a Tacotron2 model with pre-trained weight.
        Args:
            dl_kwargs (dictionary of keyword arguments):

--- a/torchaudio/pipelines/_wav2vec2/impl.py
+++ b/torchaudio/pipelines/_wav2vec2/impl.py
@@ -63,10 +63,7 @@ class Wav2Vec2Bundle:
        return state_dict
    def get_model(self, *, dl_kwargs=None) -> Wav2Vec2Model:
-        # Overriding the signature so that the return type is correct on Sphinx
+        """Construct the model and load the pretrained weight.
-        """get_model(self, *, dl_kwargs=None) -> torchaudio.models.Wav2Vec2Model
-        Construct the model and load the pretrained weight.
        The weight file is downloaded from the internet and cached with
        :func:`torch.hub.load_state_dict_from_url`