Adopt `:autosummary:` in `torchaudio.models.decoder` module doc (#2684)

Summary: * Adopts `:autosummary:` in decoder module doc * Hide the constructor signature of `CTCDecoder` as `ctc_decoder` function is the one client code is supposed to be using. * Introduce `children` property to `CTCDecoderLMState` otherwise it does not show up in the doc. https://output.circle-artifacts.com/output/job/7aac5eb9-7d2d-4f63-bcdf-83a6f40b4e5a/artifacts/0/docs/models.decoder.html <img width="748" alt="Screen Shot 2022-09-16 at 5 23 22 PM" src="https://user-images.githubusercontent.com/855818/190592409-0c2ec8a4-d2cf-4d76-a965-8a570faaeb1a.png"> https://output.circle-artifacts.com/output/job/7aac5eb9-7d2d-4f63-bcdf-83a6f40b4e5a/artifacts/0/docs/generated/torchaudio.models.decoder.CTCDecoder.html#torchaudio.models.decoder.CTCDecoder <img width="723" alt="Screen Shot 2022-09-16 at 5 23 53 PM" src="https://user-images.githubusercontent.com/855818/190592501-3fad1e07-ae3e-44f5-93be-f33181025390.png"> Pull Request resolved: https://github.com/pytorch/audio/pull/2684 Reviewed By: carolineechen Differential Revision: D39574272 Pulled By: mthrok fbshipit-source-id: d977660bd46f5cf98c535adbf2735be896b28773

Adopt `:autosummary:` in `torchaudio.models.decoder` module doc (#2684)
Summary: * Adopts `:autosummary:` in decoder module doc * Hide the constructor signature of `CTCDecoder` as `ctc_decoder` function is the one client code is supposed to be using. * Introduce `children` property to `CTCDecoderLMState` otherwise it does not show up in the doc. https://output.circle-artifacts.com/output/job/7aac5eb9-7d2d-4f63-bcdf-83a6f40b4e5a/artifacts/0/docs/models.decoder.html <img width="748" alt="Screen Shot 2022-09-16 at 5 23 22 PM" src="https://user-images.githubusercontent.com/855818/190592409-0c2ec8a4-d2cf-4d76-a965-8a570faaeb1a.png"> https://output.circle-artifacts.com/output/job/7aac5eb9-7d2d-4f63-bcdf-83a6f40b4e5a/artifacts/0/docs/generated/torchaudio.models.decoder.CTCDecoder.html#torchaudio.models.decoder.CTCDecoder <img width="723" alt="Screen Shot 2022-09-16 at 5 23 53 PM" src="https://user-images.githubusercontent.com/855818/190592501-3fad1e07-ae3e-44f5-93be-f33181025390.png"> Pull Request resolved: https://github.com/pytorch/audio/pull/2684 Reviewed By: carolineechen Differential Revision: D39574272 Pulled By: mthrok fbshipit-source-id: d977660bd46f5cf98c535adbf2735be896b28773
c89ab0c6 · moto · Facebook GitHub Bot · f50a9286 · c89ab0c6 · c89ab0c6
Commit c89ab0c6 authored Sep 16, 2022 by moto Committed by Facebook GitHub Bot Sep 16, 2022
3 changed files
--- a/docs/source/_templates/autosummary/ctc_decoder_class.rst
+++ b/docs/source/_templates/autosummary/ctc_decoder_class.rst
+..
+  autogenerated from source/_templates/autosummary/ctc_decoder_class.rst
+
+
+{#
+    ################################################################################
+    # autosummary template for CTCDecoder
+    # Since the class has multiple methods and support structure.
+    # we want to have them show up in the table of contents.
+    # The default class template does not do this, so we use custom one here.
+    ################################################################################
+#}
+
+{{ name | underline }}
+
+{%- if name != "CTCDecoder" %}
+
+.. autofunction:: {{fullname}}
+
+{%- else %}
+
+.. autoclass:: {{ fullname }}()
+
+Methods
+=======
+
+{%- for item in methods %}
+
+{{ item | underline("-") }}
+
+.. container:: py attribute
+
+   .. automethod:: {{[fullname, item] | join('.')}}
+
+{%- endfor %}
+
+Support Structures
+==================
+
+{%- for item in ["CTCDecoderLM", "CTCDecoderLMState", "CTCHypothesis"] %}
+
+{{ item | underline("-") }}
+
+.. autoclass:: torchaudio.models.decoder.{{item}}
+   :members:
+
+{%- endfor %}
+
+{%- endif %}
--- a/docs/source/models.decoder.rst
+++ b/docs/source/models.decoder.rst
-.. role:: hidden
-    :class: hidden-section
+.. py:module:: torchaudio.models.decoder

 torchaudio.models.decoder
 =========================

 .. currentmodule:: torchaudio.models.decoder

-.. py:module:: torchaudio.models.decoder
-
-Decoder Class
-------------
-
-CTCDecoder
-~~~~~~~~~~
-
-
-.. autoclass:: CTCDecoder
-
-  .. automethod:: __call__
-
-  .. automethod:: idxs_to_tokens
-
-CTCDecoderLM
-~~~~~~~~~~~~
-
-.. autoclass:: CTCDecoderLM
-
-   .. automethod:: start
-
-   .. automethod:: score
-
-   .. automethod:: finish
-
-CTCDecoderLMState
-~~~~~~~~~~~~~~~~~
-
-.. autoclass:: CTCDecoderLMState
-   :members: children
-
-   .. automethod:: child
-
-   .. automethod:: compare
-
-
-
-CTCHypothesis
-~~~~~~~~~~~~~
-
-.. autoclass:: CTCHypothesis
-
-Factory Function
----------------
-
-ctc_decoder
-~~~~~~~~~~~
+CTC Decoder
+-----------

-.. autoclass:: ctc_decoder
+.. autosummary::
+   :toctree: generated
+   :nosignatures:
+   :template: autosummary/ctc_decoder_class.rst

-Utility Function
----------------
+   CTCDecoder
+   ctc_decoder
+   download_pretrained_files

-download_pretrained_files
-~~~~~~~~~~~~~~~~~~~~~~~~~
+.. rubric:: Tutorials using CTC Decoder

-.. autoclass:: download_pretrained_files
+.. minigallery:: torchaudio.models.decoder.CTCDecoder
--- a/torchaudio/models/decoder/_ctc_decoder.py
+++ b/torchaudio/models/decoder/_ctc_decoder.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 import itertools as it
 from abc import abstractmethod
 from collections import namedtuple
-from typing import Dict, List, NamedTuple, Optional, Union
+from typing import Dict, List, NamedTuple, Optional, Tuple, Union

 import torch
 import torchaudio
@@ -96,33 +96,35 @@ def _get_word_dict(lexicon, lm, lm_dict, tokens_dict, unk_word):


 class CTCHypothesis(NamedTuple):
-    r"""Represents hypothesis generated by CTC beam search decoder :py:func:`CTCDecoder`.
+    r"""Represents hypothesis generated by CTC beam search decoder :class:`CTCDecoder`."""
+    tokens: torch.LongTensor
+    """Predicted sequence of token IDs. Shape `(L, )`, where `L` is the length of the output sequence"""
+
+    words: List[str]
+    """List of predicted words.

    Note:
-        The ``words`` field is only applicable if a lexicon is provided to the decoder. If
-        decoding without a lexicon, it will be blank. Please refer to ``tokens`` and
-        :py:func:`idxs_to_tokens <torchaudio.models.decoder.CTCDecoder.idxs_to_tokens>` instead.
-
-    :ivar torch.LongTensor tokens: Predicted sequence of token IDs. Shape `(L, )`, where
-        `L` is the length of the output sequence
-    :ivar List[str] words: List of predicted words
-    :ivar float score: Score corresponding to hypothesis
-    :ivar torch.IntTensor timesteps: Timesteps corresponding to the tokens. Shape `(L, )`,
-        where `L` is the length of the output sequence
+        This attribute is only applicable if a lexicon is provided to the decoder. If
+        decoding without a lexicon, it will be blank. Please refer to :attr:`tokens` and
+        :func:`~torchaudio.models.decoder.CTCDecoder.idxs_to_tokens` instead.
    """
-    tokens: torch.LongTensor
-    words: List[str]
+
    score: float
+    """Score corresponding to hypothesis"""
+
    timesteps: torch.IntTensor
+    """Timesteps corresponding to the tokens. Shape `(L, )`, where `L` is the length of the output sequence"""


 class CTCDecoderLMState(_LMState):
-    """Language model state.
+    """Language model state."""

-    :ivar Dict[int] children: Map of indices to LM states
-    """
+    @property
+    def children(self) -> Dict[int, CTCDecoderLMState]:
+        """Map of indices to LM states"""
+        return super().children

-    def child(self, usr_index: int):
+    def child(self, usr_index: int) -> CTCDecoderLMState:
        """Returns child corresponding to usr_index, or creates and returns a new state if input index
        is not found.

@@ -134,7 +136,7 @@ class CTCDecoderLMState(_LMState):
        """
        return super().child(usr_index)

-    def compare(self, state: CTCDecoderLMState):
+    def compare(self, state: CTCDecoderLMState) -> CTCDecoderLMState:
        """Compare two language model states.

        Args:
@@ -150,7 +152,7 @@ class CTCDecoderLM(_LM):
    """Language model base class for creating custom language models to use with the decoder."""

    @abstractmethod
-    def start(self, start_with_nothing: bool):
+    def start(self, start_with_nothing: bool) -> CTCDecoderLMState:
        """Initialize or reset the language model.

        Args:
@@ -162,7 +164,7 @@ class CTCDecoderLM(_LM):
        raise NotImplementedError

    @abstractmethod
-    def score(self, state: CTCDecoderLMState, usr_token_idx: int):
+    def score(self, state: CTCDecoderLMState, usr_token_idx: int) -> Tuple[CTCDecoderLMState, float]:
        """Evaluate the language model based on the current LM state and new word.

        Args:
@@ -170,14 +172,14 @@ class CTCDecoderLM(_LM):
            usr_token_idx (int): index of the word

        Returns:
-            Tuple[CTCDecoderLMState, float]
+            (CTCDecoderLMState, float)
                CTCDecoderLMState: new LM state
                float: score
        """
        raise NotImplementedError

    @abstractmethod
-    def finish(self, state: CTCDecoderLMState):
+    def finish(self, state: CTCDecoderLMState) -> Tuple[CTCDecoderLMState, float]:
        """Evaluate end for language model based on current LM state.

        Args:
@@ -194,24 +196,12 @@ class CTCDecoderLM(_LM):


 class CTCDecoder:
-    """
-    .. devices:: CPU
+    """CTC beam search decoder from *Flashlight* :cite:`kahn2022flashlight`.

-    CTC beam search decoder from *Flashlight* :cite:`kahn2022flashlight`.
+    .. devices:: CPU

    Note:
-        To build the decoder, please use the factory function :py:func:`ctc_decoder`.
-
-    Args:
-        nbest (int): number of best decodings to return
-        lexicon (Dict or None): lexicon mapping of words to spellings, or None for lexicon-free decoder
-        word_dict (_Dictionary): dictionary of words
-        tokens_dict (_Dictionary): dictionary of tokens
-        lm (CTCDecoderLM): language model. If using a lexicon, only word level LMs are currently supported
-        decoder_options (_LexiconDecoderOptions or _LexiconFreeDecoderOptions): parameters used for beam search decoding
-        blank_token (str): token corresopnding to blank
-        sil_token (str): token corresponding to silence
-        unk_word (str): word corresponding to unknown
+        To build the decoder, please use the factory function :func:`ctc_decoder`.
    """

    def __init__(
@@ -226,6 +216,20 @@ class CTCDecoder:
        sil_token: str,
        unk_word: str,
    ) -> None:
+        """
+        Args:
+            nbest (int): number of best decodings to return
+            lexicon (Dict or None): lexicon mapping of words to spellings, or None for lexicon-free decoder
+            word_dict (_Dictionary): dictionary of words
+            tokens_dict (_Dictionary): dictionary of tokens
+            lm (CTCDecoderLM): language model. If using a lexicon, only word level LMs are currently supported
+            decoder_options (_LexiconDecoderOptions or _LexiconFreeDecoderOptions):
+                parameters used for beam search decoding
+            blank_token (str): token corresopnding to blank
+            sil_token (str): token corresponding to silence
+            unk_word (str): word corresponding to unknown
+        """
+
        self.nbest = nbest
        self.word_dict = word_dict
        self.tokens_dict = tokens_dict
@@ -348,8 +352,7 @@ def ctc_decoder(
    sil_token: str = "|",
    unk_word: str = "<unk>",
 ) -> CTCDecoder:
-    """
-    Builds CTC beam search decoder from *Flashlight* :cite:`kahn2022flashlight`.
+    """Builds an instance of :class:`CTCDecoder`.

    Args:
        lexicon (str or None): lexicon file containing the possible words and corresponding spellings.
@@ -455,20 +458,19 @@ def _get_filenames(model: str) -> _PretrainedFiles:

 def download_pretrained_files(model: str) -> _PretrainedFiles:
    """
-    Retrieves pretrained data files used for CTC decoder.
+    Retrieves pretrained data files used for :func:`ctc_decoder`.

    Args:
        model (str): pretrained language model to download.
-            Options: ["librispeech-3-gram", "librispeech-4-gram", "librispeech"]
+            Valid values are: ``"librispeech-3-gram"``, ``"librispeech-4-gram"`` and ``"librispeech"``.

    Returns:
        Object with the following attributes
-            lm:
-                path corresponding to downloaded language model, or `None` if the model is not associated with an lm
-            lexicon:
-                path corresponding to downloaded lexicon file
-            tokens:
-                path corresponding to downloaded tokens file
+
+            * ``lm``: path corresponding to downloaded language model,
+              or ``None`` if the model is not associated with an lm
+            * ``lexicon``: path corresponding to downloaded lexicon file
+            * ``tokens``: path corresponding to downloaded tokens file
    """

    files = _get_filenames(model)