Update model documentation structure (#2902)

Summary: Currently, the documentation page for `torchaudio.models` have separate sections for model definitions and factory functions. The relationships between models and factory functions are not immediately clear. This commit moves the list of factory functions to the list of models. After: - https://output.circle-artifacts.com/output/job/242a9521-7460-4043-895b-9995bf5093b5/artifacts/0/docs/generated/torchaudio.models.Wav2Vec2Model.html <img width="1171" alt="Screen Shot 2022-12-08 at 8 41 03 PM" src="https://user-images.githubusercontent.com/855818/206603743-74a6e368-c3cf-4b87-b854-518a95893f06.png"> Pull Request resolved: https://github.com/pytorch/audio/pull/2902 Reviewed By: carolineechen Differential Revision: D41897800 Pulled By: mthrok fbshipit-source-id: a3c01d28d80e755596a9bc37c951960eb84870b9

Update model documentation structure (#2902)
Summary: Currently, the documentation page for `torchaudio.models` have separate sections for model definitions and factory functions. The relationships between models and factory functions are not immediately clear. This commit moves the list of factory functions to the list of models. After: - https://output.circle-artifacts.com/output/job/242a9521-7460-4043-895b-9995bf5093b5/artifacts/0/docs/generated/torchaudio.models.Wav2Vec2Model.html <img width="1171" alt="Screen Shot 2022-12-08 at 8 41 03 PM" src="https://user-images.githubusercontent.com/855818/206603743-74a6e368-c3cf-4b87-b854-518a95893f06.png"> Pull Request resolved: https://github.com/pytorch/audio/pull/2902 Reviewed By: carolineechen Differential Revision: D41897800 Pulled By: mthrok fbshipit-source-id: a3c01d28d80e755596a9bc37c951960eb84870b9
9912e54d · moto · Facebook GitHub Bot · 90162812 · 9912e54d · 9912e54d
Commit 9912e54d authored Dec 09, 2022 by moto Committed by Facebook GitHub Bot Dec 09, 2022
6 changed files
--- a/docs/source/_templates/autosummary/model_class.rst
+++ b/docs/source/_templates/autosummary/model_class.rst
 ..
  autogenerated from source/_templates/autosummary/model_class.rst
+.. currentmodule:: torchaudio.models
+..
 {%- set methods=["forward"] %}
+{%- set helpers={
+        "torchaudio.models.RNNTBeamSearch": [
+            "Hypothesis",
+        ],
+    }
+-%}
+{%- set factory={
+        "torchaudio.models.ConvTasNet": [
+            "conv_tasnet_base",
+        ],
+        "torchaudio.models.Wav2Vec2Model": [
+            "wav2vec2_model",
+            "wav2vec2_base",
+            "wav2vec2_large",
+            "wav2vec2_large_lv60k",
+            "hubert_base",
+            "hubert_large",
+            "hubert_xlarge",
+            "wavlm_model",
+            "wavlm_base",
+            "wavlm_large",
+        ],
+        "torchaudio.models.HuBERTPretrainModel": [
+            "hubert_pretrain_model",
+            "hubert_pretrain_base",
+            "hubert_pretrain_large",
+            "hubert_pretrain_xlarge",
+        ],
+        "torchaudio.models.RNNT": [
+            "emformer_rnnt_model",
+            "emformer_rnnt_base",
+        ],
+        "torchaudio.models.HDemucs": [
+            "hdemucs_low",
+            "hdemucs_medium",
+            "hdemucs_high",
+        ],
+    }
+-%}
+{%- set utils={
+        "torchaudio.models.Wav2Vec2Model": [
+            "~torchaudio.models.wav2vec2.utils.import_fairseq_model",
+            "~torchaudio.models.wav2vec2.utils.import_huggingface_model",
+        ]
+    }
+-%}
 {%- if name in ["Wav2Vec2Model"] %}
  {{ methods.extend(["extract_features"]) }}
 {%- elif name in ["Emformer", "RNNTBeamSearch", "WaveRNN", "Tacotron2", ] %}
@@ -10,10 +61,17 @@
  {{ methods.extend(["transcribe_streaming", "transcribe", "predict", "join"]) }}
 {%- endif %}
+.. TITLE
 {{ name | underline }}
+.. CLASS DEFINITIONS
 .. autoclass:: {{ fullname }}
+Methods
+=======
 {% for item in methods %}
 {{item | underline("-") }}
@@ -24,17 +82,56 @@
 {%- endfor %}
-{%- if name == "RNNTBeamSearch" %}
+.. HELPER STRUCTURES
+{%- if helpers[fullname] %}
 Support Structures
 ==================
-Hypothesis
+{%- for item in helpers[fullname] %}
----------
+{{item | underline("-") }}
 .. container:: py attribute
-   .. autodata:: torchaudio.models.Hypothesis
+   .. autodata:: {{["torchaudio.models", item] | join('.')}}
      :no-value:
+{%- endfor %}
+{%- endif %}
+.. FACTORY FUNCTIONS
+{%- if factory[fullname] %}
+Factory Functions
+=================
+.. autosummary::
+   :toctree: ../generated
+   :nosignatures:
+{% for item in factory[fullname] %}
+   {{["~torchaudio.models", item] | join('.')}}
+{%- endfor %}
+{%- endif %}
+.. UTILITY FUNCTIONS
+{%- if utils[fullname] %}
+Utility Functions
+=================
+.. autosummary::
+   :toctree: ../generated
+   :nosignatures:
+{% for item in utils[fullname] %}
+   {{ item }}
+{%- endfor %}
 {%- endif %}
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -7,15 +7,13 @@ torchaudio.models
 The ``torchaudio.models`` subpackage contains definitions of models for addressing common audio tasks.
-For pre-trained models, please refer to :mod:`torchaudio.pipelines` module.
+.. note::
+   For models with pre-trained parameters, please refer to :mod:`torchaudio.pipelines` module.
-Model Definitions
-----------------
 Model defintions are responsible for constructing computation graphs and executing them.
 Some models have complex structure and variations.
-For such models, `Factory Functions`_ are provided.
+For such models, factory functions are provided.
 .. autosummary::
   :toctree: generated
@@ -34,41 +32,3 @@ For such models, `Factory Functions`_ are provided.
   Wav2Letter
   Wav2Vec2Model
   WaveRNN
-Factory Functions
-----------------
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-   conv_tasnet_base
-   emformer_rnnt_model
-   emformer_rnnt_base
-   wav2vec2_model
-   wav2vec2_base
-   wav2vec2_large
-   wav2vec2_large_lv60k
-   hubert_base
-   hubert_large
-   hubert_xlarge
-   hubert_pretrain_model
-   hubert_pretrain_base
-   hubert_pretrain_large
-   hubert_pretrain_xlarge
-   hdemucs_low
-   hdemucs_medium
-   hdemucs_high
-   wavlm_model
-   wavlm_base
-   wavlm_large
-Utility Functions
-----------------
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-   ~wav2vec2.utils.import_fairseq_model
-   ~wav2vec2.utils.import_huggingface_model
--- a/torchaudio/models/_hdemucs.py
+++ b/torchaudio/models/_hdemucs.py
@@ -303,9 +303,6 @@ class HDemucs(torch.nn.Module):
    *Hybrid Spectrogram and Waveform Source Separation* :cite:`defossez2021hybrid`.
    See Also:
-        * :func:`~torchaudio.models.hdemucs_low`,
-          :func:`~torchaudio.models.hdemucs_medium`,
-          :func:`~torchaudio.models.hdemucs_high`: factory functions.
        * :class:`torchaudio.pipelines.SourceSeparationBundle`: Source separation pipeline with pre-trained models.
    Args:

--- a/torchaudio/models/conv_tasnet.py
+++ b/torchaudio/models/conv_tasnet.py
@@ -168,7 +168,6 @@ class ConvTasNet(torch.nn.Module):
        This implementation corresponds to the "non-causal" setting in the paper.
    See Also:
-        * :func:`~torchaudio.models.conv_tasnet_base`: A factory function.
        * :class:`torchaudio.pipelines.SourceSeparationBundle`: Source separation pipeline with pre-trained models.
    Args:

--- a/torchaudio/models/rnnt.py
+++ b/torchaudio/models/rnnt.py
@@ -456,8 +456,7 @@ class RNNT(torch.nn.Module):
    Recurrent neural network transducer (RNN-T) model.
    Note:
-        To build the model, please use one of the factory functions,
+        To build the model, please use one of the factory functions.
-        :py:func:`emformer_rnnt_model` or :py:func:`emformer_rnnt_base`.
    See Also:
        :class:`torchaudio.pipelines.RNNTBundle`: ASR pipeline with pre-trained models.

--- a/torchaudio/models/wav2vec2/model.py
+++ b/torchaudio/models/wav2vec2/model.py
@@ -12,10 +12,7 @@ class Wav2Vec2Model(Module):
    """Acoustic model used in *wav2vec 2.0* :cite:`baevski2020wav2vec`.
    Note:
-        To build the model, please use one of the factory functions: :py:func:`wav2vec2_model`,
+        To build the model, please use one of the factory functions.
-        :py:func:`wav2vec2_base`, :py:func:`wav2vec2_large`, :py:func:`wav2vec2_large_lv60k`,
-        :py:func:`hubert_base`, :py:func:`hubert_large`, :py:func:`hubert_xlarge`,
-        :py:func:`wavlm_model`, :py:func:`wavlm_base`, and :py:func:`wavlm_large`.
    See Also:
        * :class:`torchaudio.pipelines.Wav2Vec2Bundle`: Pretrained models (without fine-tuning)
@@ -129,13 +126,11 @@ class HuBERTPretrainModel(Module):
    HuBERT model used for pretraining in *HuBERT* :cite:`hsu2021hubert`.
    Note:
-        To build the model, please use one of the factory functions,
+        To build the model, please use one of the factory functions.
-        :py:func:`hubert_pretrain_base`, :py:func:`hubert_pretrain_large`
-        or :py:func:`hubert_pretrain_xlarge`.
    See Also:
-        `HuBERT Pre-training and Fine-tuning Examples
+        `HuBERT Pre-training and Fine-tuning Recipes
-        <https://github.com/pytorch/audio/tree/release/0.12/examples/hubert>`__
+        <https://github.com/pytorch/audio/tree/main/examples/hubert>`__
    Args:
        wav2vec2 (Wav2Vec2Model):
@@ -1241,35 +1236,35 @@ def wavlm_model(
    """Builds custom WaveLM model :cite:`chen2022wavlm`. The architecture is compatible
    with Wav2Vec2 model :cite:`baevski2020wav2vec`, and so the output object is
    :class:`~torchaudio.models.Wav2Vec2Model`. Most of the arguments have the same meaning
-    as in :py:func:`wav2vec2_model` so please refer there for documentation.
+    as in :py:func:`~torchaudio.models.wav2vec2_model` so please refer there for documentation.
    Args:
        extractor_mode (str): Operation mode of feature extractor.
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        extractor_conv_layer_config (list of integer tuples or None):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        extractor_conv_bias (bool):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_embed_dim (int):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_projection_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_pos_conv_kernel (int):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_pos_conv_groups (int):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_num_layers (int):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_num_heads (int):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_num_buckets (int):
            Number of buckets for relative position embedding.
@@ -1277,25 +1272,25 @@ def wavlm_model(
            Maximum distance for relative position embedding.
        encoder_attention_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_ff_interm_features (int):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_ff_interm_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_layer_norm_first (bool):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_layer_drop (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        aux_num_out (int or None):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
    Returns:
        Wav2Vec2Model:
@@ -1344,17 +1339,17 @@ def wavlm_base(
    Args:
        encoder_projection_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_attention_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_ff_interm_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_layer_drop (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        aux_num_out (int, optional):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
    Returns:
        Wav2Vec2Model:
@@ -1396,17 +1391,17 @@ def wavlm_large(
    Args:
        encoder_projection_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_attention_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_ff_interm_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_dropout (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        encoder_layer_drop (float):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
        aux_num_out (int, optional):
-            See :py:func:`wav2vec2_model`.
+            See :py:func:`~torchaudio.models.wav2vec2_model`.
    Returns:
        Wav2Vec2Model: