Commit 8a0d7b36 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Tweak documentation (#2656)

Summary:
1. Override class `__module__` attribute in `conf.py` so that no manual override is necessary
2. Fix SourceSeparationBundle member attribute

Pull Request resolved: https://github.com/pytorch/audio/pull/2656

Reviewed By: carolineechen

Differential Revision: D39293053

Pulled By: mthrok

fbshipit-source-id: 2b8d6be1aee517d0e692043c26ac2438a787adc6
parent 3430fd68
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
# #
import importlib
import os import os
import sys import sys
...@@ -348,7 +349,38 @@ def inject_minigalleries(app, what, name, obj, options, lines): ...@@ -348,7 +349,38 @@ def inject_minigalleries(app, what, name, obj, options, lines):
lines.append("\n") lines.append("\n")
# Overwrite the imported classes
def fix_module_path(module, attribute):
attr = importlib.import_module(module)
for attr_ in attribute:
attr = getattr(attr, attr_)
attr.__module__ = module
def fix_aliases():
patterns = {
"torchaudio.models": [
["HuBERTPretrainModel"],
["Wav2Vec2Model"],
["RNNT"],
["Tacotron2"],
],
"torchaudio.pipelines": [
["Tacotron2TTSBundle"],
["Tacotron2TTSBundle", "TextProcessor"],
["Tacotron2TTSBundle", "Vocoder"],
],
}
for module, attributes in patterns.items():
for attribute in attributes:
fix_module_path(module, attribute)
if importlib.util.find_spec("torchaudio.flashlight_lib_text_decoder") is not None:
fix_module_path("torchaudio.models.decoder", ["CTCHypothesis"])
def setup(app): def setup(app):
fix_aliases()
app.connect("autodoc-process-docstring", inject_minigalleries) app.connect("autodoc-process-docstring", inject_minigalleries)
......
...@@ -32,10 +32,10 @@ SourceSeparationBundle ...@@ -32,10 +32,10 @@ SourceSeparationBundle
~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: SourceSeparationBundle .. autoclass:: SourceSeparationBundle
:members: sample_rate
.. automethod:: get_model .. automethod:: get_model
.. automethod:: sample_rate
CONVTASNET_BASE_LIBRI2MIX CONVTASNET_BASE_LIBRI2MIX
~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~
......
...@@ -190,10 +190,7 @@ class CTCDecoder: ...@@ -190,10 +190,7 @@ class CTCDecoder:
def __call__( def __call__(
self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None
) -> List[List[CTCHypothesis]]: ) -> List[List[CTCHypothesis]]:
# Overriding the signature so that the return type is correct on Sphinx """
"""__call__(self, emissions: torch.FloatTensor, lengths: Optional[torch.Tensor] = None) -> \
List[List[torchaudio.models.decoder.CTCHypothesis]]
Args: Args:
emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
probability distribution over labels; output of acoustic model. probability distribution over labels; output of acoustic model.
......
...@@ -237,10 +237,7 @@ def wav2vec2_model( ...@@ -237,10 +237,7 @@ def wav2vec2_model(
encoder_layer_drop: float, encoder_layer_drop: float,
aux_num_out: Optional[int], aux_num_out: Optional[int],
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build a custom Wav2Vec2Model
"""wav2vec2_model(extractor_mode: str, extractor_conv_layer_config: Optional[List[Tuple[int, int, int]]], extractor_conv_bias: bool, encoder_embed_dim: int, encoder_projection_dropout: float, encoder_pos_conv_kernel: int, encoder_pos_conv_groups: int, encoder_num_layers: int, encoder_num_heads: int, encoder_attention_dropout: float, encoder_ff_interm_features: int, encoder_ff_interm_dropout: float, encoder_dropout: float, encoder_layer_norm_first: bool, encoder_layer_drop: float, aux_num_out: Optional[int]) -> torchaudio.models.Wav2Vec2Model
Build a custom Wav2Vec2Model
Note: Note:
The "feature extractor" below corresponds to The "feature extractor" below corresponds to
...@@ -396,10 +393,7 @@ def wav2vec2_base( ...@@ -396,10 +393,7 @@ def wav2vec2_base(
encoder_layer_drop: float = 0.1, encoder_layer_drop: float = 0.1,
aux_num_out: Optional[int] = None, aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build Wav2Vec2Model with "base" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
"""wav2vec2_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model with "base" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -447,10 +441,7 @@ def wav2vec2_large( ...@@ -447,10 +441,7 @@ def wav2vec2_large(
encoder_layer_drop: float = 0.1, encoder_layer_drop: float = 0.1,
aux_num_out: Optional[int] = None, aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build Wav2Vec2Model with "large" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
"""wav2vec2_large(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model with "large" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -498,10 +489,7 @@ def wav2vec2_large_lv60k( ...@@ -498,10 +489,7 @@ def wav2vec2_large_lv60k(
encoder_layer_drop: float = 0.1, encoder_layer_drop: float = 0.1,
aux_num_out: Optional[int] = None, aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build Wav2Vec2Model with "large lv-60k" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
"""wav2vec2_large_lv60k( encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.1, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.1, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model with "large lv-60k" architecture from *wav2vec 2.0* [:footcite:`baevski2020wav2vec`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -549,10 +537,7 @@ def hubert_base( ...@@ -549,10 +537,7 @@ def hubert_base(
encoder_layer_drop: float = 0.05, encoder_layer_drop: float = 0.05,
aux_num_out: Optional[int] = None, aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build HuBERT model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""hubert_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.05, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build HuBERT model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -600,10 +585,7 @@ def hubert_large( ...@@ -600,10 +585,7 @@ def hubert_large(
encoder_layer_drop: float = 0.0, encoder_layer_drop: float = 0.0,
aux_num_out: Optional[int] = None, aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build HuBERT model with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""hubert_large(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build HuBERT model with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -651,10 +633,7 @@ def hubert_xlarge( ...@@ -651,10 +633,7 @@ def hubert_xlarge(
encoder_layer_drop: float = 0.0, encoder_layer_drop: float = 0.0,
aux_num_out: Optional[int] = None, aux_num_out: Optional[int] = None,
) -> Wav2Vec2Model: ) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Build HuBERT model with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""hubert_xlarge(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, aux_num_out: Optional[int] = None) -> torchaudio.models.Wav2Vec2Model
Build HuBERT model with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -728,10 +707,7 @@ def hubert_pretrain_model( ...@@ -728,10 +707,7 @@ def hubert_pretrain_model(
final_dim: int, final_dim: int,
feature_grad_mult: Optional[float], feature_grad_mult: Optional[float],
) -> HuBERTPretrainModel: ) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx """Build a custom HuBERTPretrainModel for training from scratch
"""hubert_pretrain_model(extractor_mode: str, extractor_conv_layer_config: Optional[List[Tuple[int, int, int]]], extractor_conv_bias: bool, encoder_embed_dim: int, encoder_projection_dropout: float, encoder_pos_conv_kernel: int, encoder_pos_conv_groups: int, encoder_num_layers: int, encoder_num_heads: int, encoder_attention_dropout: float, encoder_ff_interm_features: int, encoder_ff_interm_dropout: float, encoder_dropout: float, encoder_layer_norm_first: bool, encoder_layer_drop: float, mask_prob: float, mask_selection: str, mask_other: float, mask_length: int, no_mask_overlap: bool, mask_min_space: int, mask_channel_prob: float, mask_channel_selection: str, mask_channel_other: float, mask_channel_length: int, no_mask_channel_overlap: bool, mask_channel_min_space: int, skip_masked: bool, skip_nomask: bool, num_classes: int, final_dim: int) -> torchaudio.models.HuBERTPretrainModel
Build a custom HuBERTPretrainModel for training from scratch
Note: Note:
The "feature extractor" below corresponds to The "feature extractor" below corresponds to
...@@ -999,10 +975,7 @@ def hubert_pretrain_base( ...@@ -999,10 +975,7 @@ def hubert_pretrain_base(
feature_grad_mult: Optional[float] = 0.1, feature_grad_mult: Optional[float] = 0.1,
num_classes: int = 100, num_classes: int = 100,
) -> HuBERTPretrainModel: ) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx """Build HuBERTPretrainModel model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""hubert_pretrain_base(encoder_projection_dropout: float = 0.1, encoder_attention_dropout: float = 0.1, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.1, encoder_layer_drop: float = 0.05, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = 0.1, num_classes: int = 100) -> torchaudio.models.HuBERTPretrainModel
Build HuBERTPretrainModel model with "base" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -1077,10 +1050,7 @@ def hubert_pretrain_large( ...@@ -1077,10 +1050,7 @@ def hubert_pretrain_large(
mask_channel_length: int = 10, mask_channel_length: int = 10,
feature_grad_mult: Optional[float] = None, feature_grad_mult: Optional[float] = None,
) -> HuBERTPretrainModel: ) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx """Build HuBERTPretrainModel model for pre-training with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""hubert_pretrain_large(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = None) -> torchaudio.models.HuBERTPretrainModel
Build HuBERTPretrainModel model for pre-training with "large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
...@@ -1153,10 +1123,7 @@ def hubert_pretrain_xlarge( ...@@ -1153,10 +1123,7 @@ def hubert_pretrain_xlarge(
mask_channel_length: int = 10, mask_channel_length: int = 10,
feature_grad_mult: Optional[float] = None, feature_grad_mult: Optional[float] = None,
) -> HuBERTPretrainModel: ) -> HuBERTPretrainModel:
# Overriding the signature so that the return type is correct on Sphinx """Build HuBERTPretrainModel model for pre-training with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
"""hubert_pretrain_xlarge(encoder_projection_dropout: float = 0.0, encoder_attention_dropout: float = 0.0, encoder_ff_interm_dropout: float = 0.0, encoder_dropout: float = 0.0, encoder_layer_drop: float = 0.0, mask_prob: float = 0.8, mask_channel_prob: float = 0.0, mask_channel_length: int = 10, feature_grad_mult: Optional[float] = None) -> torchaudio.models.HuBERTPretrainModel
Build HuBERTPretrainModel model for pre-training with "extra large" architecture from *HuBERT* [:footcite:`hsu2021hubert`]
Args: Args:
encoder_projection_dropout (float): encoder_projection_dropout (float):
......
...@@ -125,10 +125,7 @@ def _convert_state_dict(state_dict): ...@@ -125,10 +125,7 @@ def _convert_state_dict(state_dict):
def import_fairseq_model(original: Module) -> Wav2Vec2Model: def import_fairseq_model(original: Module) -> Wav2Vec2Model:
# Overriding the signature so that the types are correct on Sphinx """Build Wav2Vec2Model from the corresponding model object of `fairseq`_.
"""import_fairseq_model(original: torch.nn.Module) -> torchaudio.models.Wav2Vec2Model
Build Wav2Vec2Model from the corresponding model object of `fairseq`_.
Args: Args:
original (torch.nn.Module): original (torch.nn.Module):
......
...@@ -48,9 +48,7 @@ def _build(config, original): ...@@ -48,9 +48,7 @@ def _build(config, original):
def import_huggingface_model(original: Module) -> Wav2Vec2Model: def import_huggingface_model(original: Module) -> Wav2Vec2Model:
"""import_huggingface_model(original: torch.nn.Module) -> torchaudio.models.Wav2Vec2Model """Build Wav2Vec2Model from the corresponding model object of Hugging Face's `Transformers`_.
Build Wav2Vec2Model from the corresponding model object of Hugging Face's `Transformers`_.
Args: Args:
original (torch.nn.Module): An instance of ``Wav2Vec2ForCTC`` from ``transformers``. original (torch.nn.Module): An instance of ``Wav2Vec2ForCTC`` from ``transformers``.
......
...@@ -155,18 +155,19 @@ class Tacotron2TTSBundle(ABC): ...@@ -155,18 +155,19 @@ class Tacotron2TTSBundle(ABC):
See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_text_processor` for the usage. See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_text_processor` for the usage.
""" """
pass
class Vocoder(_Vocoder): class Vocoder(_Vocoder):
"""Interface of the vocoder part of Tacotron2TTS pipeline """Interface of the vocoder part of Tacotron2TTS pipeline
See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_vocoder` for the usage. See :func:`torchaudio.pipelines.Tacotron2TTSBundle.get_vocoder` for the usage.
""" """
pass
@abstractmethod @abstractmethod
def get_text_processor(self, *, dl_kwargs=None) -> TextProcessor: def get_text_processor(self, *, dl_kwargs=None) -> TextProcessor:
# Overriding the signature so that the return type is correct on Sphinx """Create a text processor
"""get_text_processor(self, *, dl_kwargs=None) -> torchaudio.pipelines.Tacotron2TTSBundle.TextProcessor
Create a text processor
For character-based pipeline, this processor splits the input text by character. For character-based pipeline, this processor splits the input text by character.
For phoneme-based pipeline, this processor converts the input text (grapheme) to For phoneme-based pipeline, this processor converts the input text (grapheme) to
...@@ -235,10 +236,7 @@ class Tacotron2TTSBundle(ABC): ...@@ -235,10 +236,7 @@ class Tacotron2TTSBundle(ABC):
@abstractmethod @abstractmethod
def get_vocoder(self, *, dl_kwargs=None) -> Vocoder: def get_vocoder(self, *, dl_kwargs=None) -> Vocoder:
# Overriding the signature so that the return type is correct on Sphinx """Create a vocoder module, based off of either WaveRNN or GriffinLim.
"""get_vocoder(self, *, dl_kwargs=None) -> torchaudio.pipelines.Tacotron2TTSBundle.Vocoder
Create a vocoder module, based off of either WaveRNN or GriffinLim.
If a pre-trained weight file is necessary, If a pre-trained weight file is necessary,
:func:`torch.hub.load_state_dict_from_url` is used to downloaded it. :func:`torch.hub.load_state_dict_from_url` is used to downloaded it.
...@@ -256,10 +254,7 @@ class Tacotron2TTSBundle(ABC): ...@@ -256,10 +254,7 @@ class Tacotron2TTSBundle(ABC):
@abstractmethod @abstractmethod
def get_tacotron2(self, *, dl_kwargs=None) -> Tacotron2: def get_tacotron2(self, *, dl_kwargs=None) -> Tacotron2:
# Overriding the signature so that the return type is correct on Sphinx """Create a Tacotron2 model with pre-trained weight.
"""get_tacotron2(self, *, dl_kwargs=None) -> torchaudio.models.Tacotron2
Create a Tacotron2 model with pre-trained weight.
Args: Args:
dl_kwargs (dictionary of keyword arguments): dl_kwargs (dictionary of keyword arguments):
......
...@@ -63,10 +63,7 @@ class Wav2Vec2Bundle: ...@@ -63,10 +63,7 @@ class Wav2Vec2Bundle:
return state_dict return state_dict
def get_model(self, *, dl_kwargs=None) -> Wav2Vec2Model: def get_model(self, *, dl_kwargs=None) -> Wav2Vec2Model:
# Overriding the signature so that the return type is correct on Sphinx """Construct the model and load the pretrained weight.
"""get_model(self, *, dl_kwargs=None) -> torchaudio.models.Wav2Vec2Model
Construct the model and load the pretrained weight.
The weight file is downloaded from the internet and cached with The weight file is downloaded from the internet and cached with
:func:`torch.hub.load_state_dict_from_url` :func:`torch.hub.load_state_dict_from_url`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment