Add fade (#449)

* add basics for Fade * add fade possibilities: at start, end or both * add different types of fade * add docstrings, add overriding possibility * remove unnecessary logic * correct typing * agnostic to batch size or n_channels * add batch test to Fade * add transform to options * add test_script_module * add coherency with test batch * remove extra step for waveform_length * update docstring * add test to compare fade with sox * change name of fade_shape * update test fade vs sox with new nomenclature for fade_shape * add Documentation Co-authored-by: Vincent QB <vincentqb@users.noreply.github.com>

Add fade (#449)
* add basics for Fade * add fade possibilities: at start, end or both * add different types of fade * add docstrings, add overriding possibility * remove unnecessary logic * correct typing * agnostic to batch size or n_channels * add batch test to Fade * add transform to options * add test_script_module * add coherency with test batch * remove extra step for waveform_length * update docstring * add test to compare fade with sox * change name of fade_shape * update test fade vs sox with new nomenclature for fade_shape * add Documentation Co-authored-by: Vincent QB <vincentqb@users.noreply.github.com>
9efc3503 · Tomás Osório · GitHub · e108fe2a · 9efc3503 · 9efc3503
Unverified Commit 9efc3503 authored Mar 10, 2020 by Tomás Osório Committed by GitHub Mar 10, 2020
5 changed files
--- a/README.md
+++ b/README.md
@@ -143,6 +143,7 @@ Transforms expect and return the following dimensions.
 * `MuLawEncode`: (channel, time) -> (channel, time)
 * `MuLawDecode`: (channel, time) -> (channel, time)
 * `Resample`: (channel, time) -> (channel, time)
+* `Fade`: (channel, time) -> (channel, time)
 Complex numbers are supported via tensors of dimension (..., 2), and torchaudio provides `complex_norm` and `angle` to convert such a tensor into its magnitude and phase. Here, and in the documentation, we use an ellipsis "..." as a placeholder for the rest of the dimensions of a tensor, e.g. optional batching and channel dimensions.

--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -101,6 +101,13 @@ Transforms are common audio transforms. They can be chained together using :clas
  .. automethod:: forward
+:hidden:`Fade`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: Fade
+  .. automethod:: forward
 :hidden:`FrequencyMasking`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/test/test_sox_effects.py
+++ b/test/test_sox_effects.py
@@ -228,6 +228,22 @@ class Test_SoxEffectsChain(unittest.TestCase):
        with self.assertRaises(RuntimeError):
            E.sox_build_flow_effects()
+    def test_fade(self):
+        x_orig, _ = torchaudio.load(self.test_filepath)
+        fade_in_len = 44100
+        fade_out_len = 44100
+        for fade_shape_sox, fade_shape_torchaudio in (("q", "quarter_sine"), ("h", "half_sine"), ("t", "linear")):
+            E = torchaudio.sox_effects.SoxEffectsChain()
+            E.set_input_file(self.test_filepath)
+            E.append_effect_to_chain("fade", [fade_shape_sox, 1, "0", 1])
+            x, sr = E.sox_build_flow_effects()
+            fade = torchaudio.transforms.Fade(fade_in_len, fade_out_len, fade_shape_torchaudio)
+            # check if effect worked
+            self.assertTrue(x.allclose(fade(x_orig), rtol=1e-4, atol=1e-4))
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -519,6 +519,27 @@ class Tester(unittest.TestCase):
        tensor = torch.rand((10, 2, n_freq, 10, 2))
        _test_script_module(transforms.TimeStretch, tensor, n_freq=n_freq, hop_length=hop_length, fixed_rate=fixed_rate)
+    def test_batch_Fade(self):
+        waveform, sample_rate = torchaudio.load(self.test_filepath)
+        fade_in_len = 3000
+        fade_out_len = 3000
+        # Single then transform then batch
+        expected = transforms.Fade(fade_in_len, fade_out_len)(waveform).repeat(3, 1, 1)
+        # Batch then transform
+        computed = transforms.Fade(fade_in_len, fade_out_len)(waveform.repeat(3, 1, 1))
+        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
+        self.assertTrue(torch.allclose(computed, expected))
+    def test_scriptmodule_Fade(self):
+        waveform, sample_rate = torchaudio.load(self.test_filepath)
+        fade_in_len = 3000
+        fade_out_len = 3000
+        _test_script_module(transforms.Fade, waveform, fade_in_len, fade_out_len)
    def test_scriptmodule_FrequencyMasking(self):
        tensor = torch.rand((10, 2, 50, 10, 2))
        _test_script_module(transforms.FrequencyMasking, tensor, freq_mask_param=60, iid_masks=False)

--- a/torchaudio/transforms.py
+++ b/torchaudio/transforms.py
@@ -22,6 +22,7 @@ __all__ = [
    'Resample',
    'ComplexNorm',
    'TimeStretch',
+    'Fade',
    'FrequencyMasking',
    'TimeMasking',
 ]
@@ -639,6 +640,79 @@ class TimeStretch(torch.nn.Module):
        return F.phase_vocoder(complex_specgrams, rate, self.phase_advance)
+class Fade(torch.nn.Module):
+    r"""Add a fade in and/or fade out to an waveform.
+    Args:
+        fade_in_len (int, optional): Length of fade-in (time frames). (Default: ``0``)
+        fade_out_len (int, optional): Length of fade-out (time frames). (Default: ``0``)
+        fade_shape (str, optional): Shape of fade. Must be one of: "quarter_sine",
+            "half_sine", "linear", "logarithmic", "exponential". (Default: ``"linear"``)
+    """
+    def __init__(self, fade_in_len=0, fade_out_len=0, fade_shape="linear"):
+        super(Fade, self).__init__()
+        self.fade_in_len = fade_in_len
+        self.fade_out_len = fade_out_len
+        self.fade_shape = fade_shape
+    def forward(self, waveform):
+        # type: (Tensor) -> Tensor
+        r"""
+        Args:
+            waveform (torch.Tensor): Tensor of audio of dimension (..., time).
+        Returns:
+            torch.Tensor: Tensor of audio of dimension (..., time).
+        """
+        waveform_length = waveform.size()[-1]
+        return self._fade_in(waveform_length) * self._fade_out(waveform_length) * waveform
+    def _fade_in(self, waveform_length):
+        # type: (int) -> Tensor
+        fade = torch.linspace(0, 1, self.fade_in_len)
+        ones = torch.ones(waveform_length - self.fade_in_len)
+        if self.fade_shape == "linear":
+            fade = fade
+        if self.fade_shape == "exponential":
+            fade = torch.pow(2, (fade - 1)) * fade
+        if self.fade_shape == "logarithmic":
+            fade = torch.log10(.1 + fade) + 1
+        if self.fade_shape == "quarter_sine":
+            fade = torch.sin(fade * math.pi / 2)
+        if self.fade_shape == "half_sine":
+            fade = torch.sin(fade * math.pi - math.pi / 2) / 2 + 0.5
+        return torch.cat((fade, ones)).clamp_(0, 1)
+    def _fade_out(self, waveform_length):
+        # type: (int) -> Tensor
+        fade = torch.linspace(0, 1, self.fade_out_len)
+        ones = torch.ones(waveform_length - self.fade_out_len)
+        if self.fade_shape == "linear":
+            fade = - fade + 1
+        if self.fade_shape == "exponential":
+            fade = torch.pow(2, - fade) * (1 - fade)
+        if self.fade_shape == "logarithmic":
+            fade = torch.log10(1.1 - fade) + 1
+        if self.fade_shape == "quarter_sine":
+            fade = torch.sin(fade * math.pi / 2 + math.pi / 2)
+        if self.fade_shape == "half_sine":
+            fade = torch.sin(fade * math.pi + math.pi / 2) / 2 + 0.5
+        return torch.cat((ones, fade)).clamp_(0, 1)
 class _AxisMasking(torch.nn.Module):
    r"""Apply masking to a spectrogram.