Commit 8a0d7b36 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Tweak documentation (#2656)

Summary:
1. Override class `__module__` attribute in `conf.py` so that no manual override is necessary
2. Fix SourceSeparationBundle member attribute

Pull Request resolved: https://github.com/pytorch/audio/pull/2656

Reviewed By: carolineechen

Differential Revision: D39293053

Pulled By: mthrok

fbshipit-source-id: 2b8d6be1aee517d0e692043c26ac2438a787adc6
parent 3430fd68
......@@ -17,6 +17,7 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import importlib
import os
import sys
......@@ -348,7 +349,38 @@ def inject_minigalleries(app, what, name, obj, options, lines):
lines.append("\n")
# Overwrite the imported classes
def fix_module_path(module, attribute):
attr = importlib.import_module(module)
for attr_ in attribute:
attr = getattr(attr, attr_)
attr.__module__ = module
def fix_aliases():
patterns = {
"torchaudio.models": [
["HuBERTPretrainModel"],
["Wav2Vec2Model"],
["RNNT"],
["Tacotron2"],
],
"torchaudio.pipelines": [
["Tacotron2TTSBundle"],
["Tacotron2TTSBundle", "TextProcessor"],
["Tacotron2TTSBundle", "Vocoder"],
],
}
for module, attributes in patterns.items():
for attribute in attributes:
fix_module_path(module, attribute)
if importlib.util.find_spec("torchaudio.flashlight_lib_text_decoder") is not None:
fix_module_path("torchaudio.models.decoder", ["CTCHypothesis"])
def setup(app):
fix_aliases()
app.connect("autodoc-process-docstring", inject_minigalleries)
......
......@@ -32,10 +32,10 @@ SourceSeparationBundle
~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: SourceSeparationBundle
:members: sample_rate
.. automethod:: get_model
.. automethod:: sample_rate
CONVTASNET_BASE_LIBRI2MIX
~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -190,10 +190,7 @@ class CTCDecoder:
def __call__(
self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None
) -> List[List[CTCHypothesis]]:
# Overriding the signature so that the return type is correct on Sphinx
"""__call__(self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None) -> \
List[List[torchaudio.models.decoder.CTCHypothesis]]
"""
Args:
emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
probability distribution over labels; output of acoustic model.
......
......@@ -237,10 +237,7 @@ def wav2vec2_model(
encoder_layer_drop: float,
aux_num_out: Optional[int],
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""wav2vec2_model(extractor_mode: str, extractor_conv_layer_config: Optional[List[Tuple[int, int, int]]], extractor_conv_bias: bool, encoder_embed_dim: int, encoder_projection_dropout: float, encoder_pos_conv_kernel: int, encoder_pos_conv_groups: int, encoder_num_layers: int, encoder_num_heads: int, encoder_attention_dropout: float, encoder_ff_interm_features: int, encoder_ff_interm_dropout: float, encoder_dropout: float, encoder_layer_norm_first: bool, encoder_layer_drop: float, aux_num_out: Optional[int]) -> torchaudio.models.Wav2Vec2Model
Build a custom Wav2Vec2Model
"""Build a custom Wav2Vec2Model
Note:
The "feature extractor" below corresponds to
......@@ -396,10 +393,7 @@ def wav2vec2_base(
encoder_layer_drop: float = 0.1,
aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""wav2vec2_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model with "base" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
"""Build Wav2Vec2Model with "base" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
Args:
encoder_projection_dropout (float):
......@@ -447,10 +441,7 @@ def wav2vec2_large(
encoder_layer_drop: float = 0.1,
aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""wav2vec2_large(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model with "large" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
"""Build Wav2Vec2Model with "large" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
Args:
encoder_projection_dropout (float):
......@@ -498,10 +489,7 @@ def wav2vec2_large_lv60k(
encoder_layer_drop: float = 0.1,
aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""wav2vec2_large_lv60k( encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model with "large lv-60k" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
"""Build Wav2Vec2Model with "large lv-60k" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
Args:
encoder_projection_dropout (float):
......@@ -549,10 +537,7 @@ def hubert_base(
encoder_layer_drop: float = 0.05,
aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.05, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build HuBERT model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""Build HuBERT model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args:
encoder_projection_dropout (float):
......@@ -600,10 +585,7 @@ def hubert_large(
encoder_layer_drop: float = 0.0,
aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_large(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build HuBERT model with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""Build HuBERT model with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args:
encoder_projection_dropout (float):
......@@ -651,10 +633,7 @@ def hubert_xlarge(
encoder_layer_drop: float = 0.0,
aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_xlarge(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build HuBERT model with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""Build HuBERT model with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args:
encoder_projection_dropout (float):
......@@ -728,10 +707,7 @@ def hubert_pretrain_model(
final_dim: int,
feature_grad_mult: Optional[float],
) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_pretrain_model(extractor_mode: str, extractor_conv_layer_config: Optional[List[Tuple[int, int, int]]], extractor_conv_bias: bool, encoder_embed_dim: int, encoder_projection_dropout: float, encoder_pos_conv_kernel: int, encoder_pos_conv_groups: int, encoder_num_layers: int, encoder_num_heads: int, encoder_attention_dropout: float, encoder_ff_interm_features: int, encoder_ff_interm_dropout: float, encoder_dropout: float, encoder_layer_norm_first: bool, encoder_layer_drop: float, mask_prob: float, mask_selection: str, mask_other: float, mask_length: int, no_mask_overlap: bool, mask_min_space: int, mask_channel_prob: float, mask_channel_selection: str, mask_channel_other: float, mask_channel_length: int, no_mask_channel_overlap: bool, mask_channel_min_space: int, skip_masked: bool, skip_nomask: bool, num_classes: int, final_dim: int) -> torchaudio.models.HuBERTPretrainModel
Build a custom HuBERTPretrainModel for training from scratch
"""Build a custom HuBERTPretrainModel for training from scratch
Note:
The "feature extractor" below corresponds to
......@@ -999,10 +975,7 @@ def hubert_pretrain_base(
feature_grad_mult: Optional[float] = 0.1,
num_classes: int = 100,
) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_pretrain_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.05, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = 0.1, num_classes: int = 100) -> torchaudio.models.HuBERTPretrainModel
Build HuBERTPretrainModel model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""Build HuBERTPretrainModel model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args:
encoder_projection_dropout (float):
......@@ -1077,10 +1050,7 @@ def hubert_pretrain_large(
mask_channel_length: int = 10,
feature_grad_mult: Optional[float] = None,
) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_pretrain_large(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = None) -> torchaudio.models.HuBERTPretrainModel
Build HuBERTPretrainModel model for pre-training with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""Build HuBERTPretrainModel model for pre-training with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args:
encoder_projection_dropout (float):
......@@ -1153,10 +1123,7 @@ def hubert_pretrain_xlarge(
mask_channel_length: int = 10,
feature_grad_mult: Optional[float] = None,
) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx
"""hubert_pretrain_xlarge(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = None) -> torchaudio.models.HuBERTPretrainModel
Build HuBERTPretrainModel model for pre-training with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""Build HuBERTPretrainModel model for pre-training with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args:
encoder_projection_dropout (float):
......
......@@ -125,10 +125,7 @@ def _convert_state_dict(state_dict):
def import_fairseq_model(original: Module) -> Wav2Vec2Model:
# Overriding the signature so that the types are correct on Sphinx
"""import_fairseq_model(original: torch.nn.Module) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model from the corresponding model object of `fairseq`_.
"""Build Wav2Vec2Model from the corresponding model object of `fairseq`_.
Args:
original (torch.nn.Module):
......
......@@ -48,9 +48,7 @@ def _build(config, original):
def import_huggingface_model(original: Module) -> Wav2Vec2Model:
"""import_huggingface_model(original: torch.nn.Module) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model from the corresponding model object of Hugging Face's `Transformers`_.
"""Build Wav2Vec2Model from the corresponding model object of Hugging Face's `Transformers`_.
Args:
original (torch.nn.Module): An instance of ``Wav2Vec2ForCTC`` from ``transformers``.
......
......@@ -155,18 +155,19 @@ class Tacotron2TTSBundle(ABC):
See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_text_processor` for the usage.
"""
pass
class Vocoder(_Vocoder):
"""Interface of the vocoder part of Tacotron2TTS pipeline
See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_vocoder` for the usage.
"""
pass
@abstractmethod
def get_text_processor(self, *, dl_kwargs=None) -> TextProcessor:
# Overriding the signature so that the return type is correct on Sphinx
"""get_text_processor(self, *, dl_kwargs=None) -> torchaudio.pipelines.Tacotron2TTSBundle.TextProcessor
Create a text processor
"""Create a text processor
For character-based pipeline, this processor splits the input text by character.
For phoneme-based pipeline, this processor converts the input text (grapheme) to
......@@ -235,10 +236,7 @@ class Tacotron2TTSBundle(ABC):
@abstractmethod
def get_vocoder(self, *, dl_kwargs=None) -> Vocoder:
# Overriding the signature so that the return type is correct on Sphinx
"""get_vocoder(self, *, dl_kwargs=None) -> torchaudio.pipelines.Tacotron2TTSBundle.Vocoder
Create a vocoder module, based off of either WaveRNN or GriffinLim.
"""Create a vocoder module, based off of either WaveRNN or GriffinLim.
If a pre-trained weight file is necessary,
:func:`torch.hub.load_state_dict_from_url` is used to downloaded it.
......@@ -256,10 +254,7 @@ class Tacotron2TTSBundle(ABC):
@abstractmethod
def get_tacotron2(self, *, dl_kwargs=None) -> Tacotron2:
# Overriding the signature so that the return type is correct on Sphinx
"""get_tacotron2(self, *, dl_kwargs=None) -> torchaudio.models.Tacotron2
Create a Tacotron2 model with pre-trained weight.
"""Create a Tacotron2 model with pre-trained weight.
Args:
dl_kwargs (dictionary of keyword arguments):
......
......@@ -63,10 +63,7 @@ class Wav2Vec2Bundle:
return state_dict
def get_model(self, *, dl_kwargs=None) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx
"""get_model(self, *, dl_kwargs=None) -> torchaudio.models.Wav2Vec2Model
Construct the model and load the pretrained weight.
"""Construct the model and load the pretrained weight.
The weight file is downloaded from the internet and cached with
:func:`torch.hub.load_state_dict_from_url`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment