Improve resampling documentation (#1519)

22fe8026 · Caroline Chen · GitHub · 7763ed87 · 22fe8026 · 22fe8026
Unverified Commit 22fe8026 authored May 20, 2021 by Caroline Chen Committed by GitHub May 20, 2021
Showing with 9 additions and 12 deletions

torchaudio/compliance/kaldi.py torchaudio/compliance/kaldi.py +2 -0

torchaudio/functional/functional.py torchaudio/functional/functional.py +4 -9

torchaudio/transforms.py torchaudio/transforms.py +3 -3

No files found.
--- a/torchaudio/compliance/kaldi.py
+++ b/torchaudio/compliance/kaldi.py
@@ -770,6 +770,8 @@ def resample_waveform(waveform: Tensor,
            but less efficient. We suggest around 4 to 10 for normal use. (Default: ``6``)
        rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist.
            Lower values reduce anti-aliasing, but also reduce some of the highest frequencies. (Default: ``0.99``)
+        resampling_method (str, optional): The resampling method to use.
+            Options: [``sinc_interpolation``, ``kaiser_window``] (Default: ``'sinc_interpolation'``)
    Returns:
        Tensor: The waveform at the new frequency

--- a/torchaudio/functional/functional.py
+++ b/torchaudio/functional/functional.py
@@ -1424,26 +1424,21 @@ def resample(
        resampling_method: str = "sinc_interpolation",
        beta: Optional[float] = None,
 ) -> Tensor:
-    r"""Resamples the waveform at the new frequency. This matches Kaldi's OfflineFeatureTpl ResampleWaveform
+    r"""Resamples the waveform at the new frequency using bandlimited interpolation.
-    which uses a LinearResample (resample a signal at linearly spaced intervals to upsample/downsample
-    a signal). LinearResample (LR) means that the output signal is at linearly spaced intervals (i.e
-    the output signal has a frequency of ``new_freq``). It uses sinc/bandlimited interpolation to
-    upsample/downsample the signal.
    https://ccrma.stanford.edu/~jos/resample/Theory_Ideal_Bandlimited_Interpolation.html
-    https://github.com/kaldi-asr/kaldi/blob/master/src/feat/resample.h#L56
    Args:
        waveform (Tensor): The input signal of dimension (..., time)
        orig_freq (float): The original frequency of the signal
        new_freq (float): The desired frequency
        lowpass_filter_width (int, optional): Controls the sharpness of the filter, more == sharper
-            but less efficient. We suggest around 4 to 10 for normal use. (Default: ``6``)
+            but less efficient. (Default: ``6``)
        rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist.
            Lower values reduce anti-aliasing, but also reduce some of the highest frequencies. (Default: ``0.99``)
-        resampling_method (str, optional): The resampling method.
+        resampling_method (str, optional): The resampling method to use.
            Options: [``sinc_interpolation``, ``kaiser_window``] (Default: ``'sinc_interpolation'``)
-        beta (float, optional): The shape parameter used for kaiser window.
+        beta (float or None): The shape parameter used for kaiser window.
    Returns:
        Tensor: The waveform at the new frequency of dimension (..., time).

--- a/torchaudio/transforms.py
+++ b/torchaudio/transforms.py
@@ -666,13 +666,13 @@ class Resample(torch.nn.Module):
    Args:
        orig_freq (float, optional): The original frequency of the signal. (Default: ``16000``)
        new_freq (float, optional): The desired frequency. (Default: ``16000``)
-        resampling_method (str, optional): The resampling method.
+        resampling_method (str, optional): The resampling method to use.
            Options: [``sinc_interpolation``, ``kaiser_window``] (Default: ``'sinc_interpolation'``)
        lowpass_filter_width (int, optional): Controls the sharpness of the filter, more == sharper
-            but less efficient. We suggest around 4 to 10 for normal use. (Default: ``6``)
+            but less efficient. (Default: ``6``)
        rolloff (float, optional): The roll-off frequency of the filter, as a fraction of the Nyquist.
            Lower values reduce anti-aliasing, but also reduce some of the highest frequencies. (Default: ``0.99``)
-        beta (float, optional): The shape parameter used for kaiser window.
+        beta (float or None): The shape parameter used for kaiser window.
        Note: If resampling on waveforms of higher precision than float32, there may be a small loss of precision
        because the kernel is cached once as float32. If high precision resampling is important for your application,