Unverified Commit 9efc3503 authored by Tomás Osório's avatar Tomás Osório Committed by GitHub
Browse files

Add fade (#449)



* add basics for Fade

* add fade possibilities: at start, end or both

* add different types of fade

* add docstrings, add overriding possibility

* remove unnecessary logic

* correct typing

* agnostic to batch size or n_channels

* add batch test to Fade

* add transform to options

* add test_script_module

* add coherency with test batch

* remove extra step for waveform_length

* update docstring

* add test to compare fade with sox

* change name of fade_shape

* update test fade vs sox with new nomenclature for fade_shape

* add Documentation
Co-authored-by: default avatarVincent QB <vincentqb@users.noreply.github.com>
parent e108fe2a
...@@ -143,6 +143,7 @@ Transforms expect and return the following dimensions. ...@@ -143,6 +143,7 @@ Transforms expect and return the following dimensions.
* `MuLawEncode`: (channel, time) -> (channel, time) * `MuLawEncode`: (channel, time) -> (channel, time)
* `MuLawDecode`: (channel, time) -> (channel, time) * `MuLawDecode`: (channel, time) -> (channel, time)
* `Resample`: (channel, time) -> (channel, time) * `Resample`: (channel, time) -> (channel, time)
* `Fade`: (channel, time) -> (channel, time)
Complex numbers are supported via tensors of dimension (..., 2), and torchaudio provides `complex_norm` and `angle` to convert such a tensor into its magnitude and phase. Here, and in the documentation, we use an ellipsis "..." as a placeholder for the rest of the dimensions of a tensor, e.g. optional batching and channel dimensions. Complex numbers are supported via tensors of dimension (..., 2), and torchaudio provides `complex_norm` and `angle` to convert such a tensor into its magnitude and phase. Here, and in the documentation, we use an ellipsis "..." as a placeholder for the rest of the dimensions of a tensor, e.g. optional batching and channel dimensions.
......
...@@ -101,6 +101,13 @@ Transforms are common audio transforms. They can be chained together using :clas ...@@ -101,6 +101,13 @@ Transforms are common audio transforms. They can be chained together using :clas
.. automethod:: forward .. automethod:: forward
:hidden:`Fade`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: Fade
.. automethod:: forward
:hidden:`FrequencyMasking` :hidden:`FrequencyMasking`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
...@@ -228,6 +228,22 @@ class Test_SoxEffectsChain(unittest.TestCase): ...@@ -228,6 +228,22 @@ class Test_SoxEffectsChain(unittest.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
E.sox_build_flow_effects() E.sox_build_flow_effects()
def test_fade(self):
x_orig, _ = torchaudio.load(self.test_filepath)
fade_in_len = 44100
fade_out_len = 44100
for fade_shape_sox, fade_shape_torchaudio in (("q", "quarter_sine"), ("h", "half_sine"), ("t", "linear")):
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(self.test_filepath)
E.append_effect_to_chain("fade", [fade_shape_sox, 1, "0", 1])
x, sr = E.sox_build_flow_effects()
fade = torchaudio.transforms.Fade(fade_in_len, fade_out_len, fade_shape_torchaudio)
# check if effect worked
self.assertTrue(x.allclose(fade(x_orig), rtol=1e-4, atol=1e-4))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -519,6 +519,27 @@ class Tester(unittest.TestCase): ...@@ -519,6 +519,27 @@ class Tester(unittest.TestCase):
tensor = torch.rand((10, 2, n_freq, 10, 2)) tensor = torch.rand((10, 2, n_freq, 10, 2))
_test_script_module(transforms.TimeStretch, tensor, n_freq=n_freq, hop_length=hop_length, fixed_rate=fixed_rate) _test_script_module(transforms.TimeStretch, tensor, n_freq=n_freq, hop_length=hop_length, fixed_rate=fixed_rate)
def test_batch_Fade(self):
waveform, sample_rate = torchaudio.load(self.test_filepath)
fade_in_len = 3000
fade_out_len = 3000
# Single then transform then batch
expected = transforms.Fade(fade_in_len, fade_out_len)(waveform).repeat(3, 1, 1)
# Batch then transform
computed = transforms.Fade(fade_in_len, fade_out_len)(waveform.repeat(3, 1, 1))
self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
self.assertTrue(torch.allclose(computed, expected))
def test_scriptmodule_Fade(self):
waveform, sample_rate = torchaudio.load(self.test_filepath)
fade_in_len = 3000
fade_out_len = 3000
_test_script_module(transforms.Fade, waveform, fade_in_len, fade_out_len)
def test_scriptmodule_FrequencyMasking(self): def test_scriptmodule_FrequencyMasking(self):
tensor = torch.rand((10, 2, 50, 10, 2)) tensor = torch.rand((10, 2, 50, 10, 2))
_test_script_module(transforms.FrequencyMasking, tensor, freq_mask_param=60, iid_masks=False) _test_script_module(transforms.FrequencyMasking, tensor, freq_mask_param=60, iid_masks=False)
......
...@@ -22,6 +22,7 @@ __all__ = [ ...@@ -22,6 +22,7 @@ __all__ = [
'Resample', 'Resample',
'ComplexNorm', 'ComplexNorm',
'TimeStretch', 'TimeStretch',
'Fade',
'FrequencyMasking', 'FrequencyMasking',
'TimeMasking', 'TimeMasking',
] ]
...@@ -639,6 +640,79 @@ class TimeStretch(torch.nn.Module): ...@@ -639,6 +640,79 @@ class TimeStretch(torch.nn.Module):
return F.phase_vocoder(complex_specgrams, rate, self.phase_advance) return F.phase_vocoder(complex_specgrams, rate, self.phase_advance)
class Fade(torch.nn.Module):
r"""Add a fade in and/or fade out to an waveform.
Args:
fade_in_len (int, optional): Length of fade-in (time frames). (Default: ``0``)
fade_out_len (int, optional): Length of fade-out (time frames). (Default: ``0``)
fade_shape (str, optional): Shape of fade. Must be one of: "quarter_sine",
"half_sine", "linear", "logarithmic", "exponential". (Default: ``"linear"``)
"""
def __init__(self, fade_in_len=0, fade_out_len=0, fade_shape="linear"):
super(Fade, self).__init__()
self.fade_in_len = fade_in_len
self.fade_out_len = fade_out_len
self.fade_shape = fade_shape
def forward(self, waveform):
# type: (Tensor) -> Tensor
r"""
Args:
waveform (torch.Tensor): Tensor of audio of dimension (..., time).
Returns:
torch.Tensor: Tensor of audio of dimension (..., time).
"""
waveform_length = waveform.size()[-1]
return self._fade_in(waveform_length) * self._fade_out(waveform_length) * waveform
def _fade_in(self, waveform_length):
# type: (int) -> Tensor
fade = torch.linspace(0, 1, self.fade_in_len)
ones = torch.ones(waveform_length - self.fade_in_len)
if self.fade_shape == "linear":
fade = fade
if self.fade_shape == "exponential":
fade = torch.pow(2, (fade - 1)) * fade
if self.fade_shape == "logarithmic":
fade = torch.log10(.1 + fade) + 1
if self.fade_shape == "quarter_sine":
fade = torch.sin(fade * math.pi / 2)
if self.fade_shape == "half_sine":
fade = torch.sin(fade * math.pi - math.pi / 2) / 2 + 0.5
return torch.cat((fade, ones)).clamp_(0, 1)
def _fade_out(self, waveform_length):
# type: (int) -> Tensor
fade = torch.linspace(0, 1, self.fade_out_len)
ones = torch.ones(waveform_length - self.fade_out_len)
if self.fade_shape == "linear":
fade = - fade + 1
if self.fade_shape == "exponential":
fade = torch.pow(2, - fade) * (1 - fade)
if self.fade_shape == "logarithmic":
fade = torch.log10(1.1 - fade) + 1
if self.fade_shape == "quarter_sine":
fade = torch.sin(fade * math.pi / 2 + math.pi / 2)
if self.fade_shape == "half_sine":
fade = torch.sin(fade * math.pi + math.pi / 2) / 2 + 0.5
return torch.cat((ones, fade)).clamp_(0, 1)
class _AxisMasking(torch.nn.Module): class _AxisMasking(torch.nn.Module):
r"""Apply masking to a spectrogram. r"""Apply masking to a spectrogram.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment