Unverified Commit 89aeb686 authored by moto's avatar moto Committed by GitHub
Browse files

Add filter bank figures (#1891)

parent 9e3778d2
...@@ -543,6 +543,13 @@ def melscale_fbanks( ...@@ -543,6 +543,13 @@ def melscale_fbanks(
) -> Tensor: ) -> Tensor:
r"""Create a frequency bin conversion matrix. r"""Create a frequency bin conversion matrix.
Note:
For the sake of the numerical compatibility with librosa, not all the coefficients
in the resulting filter bank has magnitude of 1.
.. image:: https://download.pytorch.org/torchaudio/doc-assets/mel_fbanks.png
:alt: Visualization of generated filter bank
Args: Args:
n_freqs (int): Number of frequencies to highlight/apply n_freqs (int): Number of frequencies to highlight/apply
f_min (float): Minimum frequency (Hz) f_min (float): Minimum frequency (Hz)
...@@ -559,6 +566,7 @@ def melscale_fbanks( ...@@ -559,6 +566,7 @@ def melscale_fbanks(
Each column is a filterbank so that assuming there is a matrix A of Each column is a filterbank so that assuming there is a matrix A of
size (..., ``n_freqs``), the applied result would be size (..., ``n_freqs``), the applied result would be
``A * melscale_fbanks(A.size(-1), ...)``. ``A * melscale_fbanks(A.size(-1), ...)``.
""" """
if norm is not None and norm != "slaney": if norm is not None and norm != "slaney":
...@@ -601,6 +609,13 @@ def linear_fbanks( ...@@ -601,6 +609,13 @@ def linear_fbanks(
) -> Tensor: ) -> Tensor:
r"""Creates a linear triangular filterbank. r"""Creates a linear triangular filterbank.
Note:
For the sake of the numerical compatibility with librosa, not all the coefficients
in the resulting filter bank has magnitude of 1.
.. image:: https://download.pytorch.org/torchaudio/doc-assets/lin_fbanks.png
:alt: Visualization of generated filter bank
Args: Args:
n_freqs (int): Number of frequencies to highlight/apply n_freqs (int): Number of frequencies to highlight/apply
f_min (float): Minimum frequency (Hz) f_min (float): Minimum frequency (Hz)
......
...@@ -344,6 +344,10 @@ class MelScale(torch.nn.Module): ...@@ -344,6 +344,10 @@ class MelScale(torch.nn.Module):
norm (str or None, optional): If 'slaney', divide the triangular mel weights by the width of the mel band norm (str or None, optional): If 'slaney', divide the triangular mel weights by the width of the mel band
(area normalization). (Default: ``None``) (area normalization). (Default: ``None``)
mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``) mel_scale (str, optional): Scale to use: ``htk`` or ``slaney``. (Default: ``htk``)
See also:
:py:func:`torchaudio.functional.melscale_fbanks` - The function used to
generate the filter banks.
""" """
__constants__ = ['n_mels', 'sample_rate', 'f_min', 'f_max'] __constants__ = ['n_mels', 'sample_rate', 'f_min', 'f_max']
...@@ -483,8 +487,10 @@ class InverseMelScale(torch.nn.Module): ...@@ -483,8 +487,10 @@ class InverseMelScale(torch.nn.Module):
class MelSpectrogram(torch.nn.Module): class MelSpectrogram(torch.nn.Module):
r"""Create MelSpectrogram for a raw audio signal. This is a composition of Spectrogram r"""Create MelSpectrogram for a raw audio signal.
and MelScale.
This is a composition of :py:func:`torchaudio.transforms.Spectrogram` and
and :py:func:`torchaudio.transforms.MelScale`.
Sources Sources
* https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe * https://gist.github.com/kastnerkyle/179d6e9a88202ab0a2fe
...@@ -521,6 +527,10 @@ class MelSpectrogram(torch.nn.Module): ...@@ -521,6 +527,10 @@ class MelSpectrogram(torch.nn.Module):
>>> waveform, sample_rate = torchaudio.load('test.wav', normalize=True) >>> waveform, sample_rate = torchaudio.load('test.wav', normalize=True)
>>> transform = transforms.MelSpectrogram(sample_rate) >>> transform = transforms.MelSpectrogram(sample_rate)
>>> mel_specgram = transform(waveform) # (channel, n_mels, time) >>> mel_specgram = transform(waveform) # (channel, n_mels, time)
See also:
:py:func:`torchaudio.functional.melscale_fbanks` - The function used to
generate the filter banks.
""" """
__constants__ = ['sample_rate', 'n_fft', 'win_length', 'hop_length', 'pad', 'n_mels', 'f_min'] __constants__ = ['sample_rate', 'n_fft', 'win_length', 'hop_length', 'pad', 'n_mels', 'f_min']
...@@ -599,6 +609,10 @@ class MFCC(torch.nn.Module): ...@@ -599,6 +609,10 @@ class MFCC(torch.nn.Module):
norm (str, optional): norm to use. (Default: ``'ortho'``) norm (str, optional): norm to use. (Default: ``'ortho'``)
log_mels (bool, optional): whether to use log-mel spectrograms instead of db-scaled. (Default: ``False``) log_mels (bool, optional): whether to use log-mel spectrograms instead of db-scaled. (Default: ``False``)
melkwargs (dict or None, optional): arguments for MelSpectrogram. (Default: ``None``) melkwargs (dict or None, optional): arguments for MelSpectrogram. (Default: ``None``)
See also:
:py:func:`torchaudio.functional.melscale_fbanks` - The function used to
generate the filter banks.
""" """
__constants__ = ['sample_rate', 'n_mfcc', 'dct_type', 'top_db', 'log_mels'] __constants__ = ['sample_rate', 'n_mfcc', 'dct_type', 'top_db', 'log_mels']
...@@ -670,6 +684,11 @@ class LFCC(torch.nn.Module): ...@@ -670,6 +684,11 @@ class LFCC(torch.nn.Module):
norm (str, optional): norm to use. (Default: ``'ortho'``) norm (str, optional): norm to use. (Default: ``'ortho'``)
log_lf (bool, optional): whether to use log-lf spectrograms instead of db-scaled. (Default: ``False``) log_lf (bool, optional): whether to use log-lf spectrograms instead of db-scaled. (Default: ``False``)
speckwargs (dict or None, optional): arguments for Spectrogram. (Default: ``None``) speckwargs (dict or None, optional): arguments for Spectrogram. (Default: ``None``)
See also:
:py:func:`torchaudio.functional.linear_fbanks` - The function used to
generate the filter banks.
""" """
__constants__ = ['sample_rate', 'n_filter', 'n_lfcc', 'dct_type', 'top_db', 'log_lf'] __constants__ = ['sample_rate', 'n_filter', 'n_lfcc', 'dct_type', 'top_db', 'log_lf']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment