Add band,treble,deemph,riaa to functional.py (#470)

df9a0417 · Bhargav Kathivarapu · GitHub · 11fb22aa · df9a0417 · df9a0417
Unverified Commit df9a0417 authored Mar 24, 2020 by Bhargav Kathivarapu Committed by GitHub Mar 24, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 283 additions and 0 deletions

test/test_functional_filtering.py test/test_functional_filtering.py +97 -0

torchaudio/functional.py torchaudio/functional.py +186 -0

No files found.
--- a/test/test_functional_filtering.py
+++ b/test/test_functional_filtering.py
@@ -230,6 +230,103 @@ class TestFunctionalFiltering(unittest.TestCase):
        assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
        _test_torchscript_functional(F.bandreject_biquad, waveform, sample_rate, CENTRAL_FREQ, Q)
+    def test_band_with_noise(self):
+        """
+        Test biquad band filter with noise mode, compare to SoX implementation
+        """
+        CENTRAL_FREQ = 1000
+        Q = 0.707
+        NOISE = True
+        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("band", ["-n", CENTRAL_FREQ, str(Q) + 'q'])
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
+        assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
+        _test_torchscript_functional(F.band_biquad, waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
+    def test_band_without_noise(self):
+        """
+        Test biquad band filter without noise mode, compare to SoX implementation
+        """
+        CENTRAL_FREQ = 1000
+        Q = 0.707
+        NOISE = False
+        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("band", [CENTRAL_FREQ, str(Q) + 'q'])
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
+        assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
+        _test_torchscript_functional(F.band_biquad, waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
+    def test_treble(self):
+        """
+        Test biquad treble filter, compare to SoX implementation
+        """
+        CENTRAL_FREQ = 1000
+        Q = 0.707
+        GAIN = 40
+        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("treble", [GAIN, CENTRAL_FREQ, str(Q) + 'q'])
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.treble_biquad(waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)
+        assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
+        _test_torchscript_functional(F.treble_biquad, waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)
+    def test_deemph(self):
+        """
+        Test biquad deemph filter, compare to SoX implementation
+        """
+        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("deemph")
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.deemph_biquad(waveform, sample_rate)
+        assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
+        _test_torchscript_functional(F.deemph_biquad, waveform, sample_rate)
+    def test_riaa(self):
+        """
+        Test biquad riaa filter, compare to SoX implementation
+        """
+        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("riaa")
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.riaa_biquad(waveform, sample_rate)
+        assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
+        _test_torchscript_functional(F.riaa_biquad, waveform, sample_rate)
    def test_equalizer(self):
        """
        Test biquad peaking equalizer filter, compare to SoX implementation

--- a/torchaudio/functional.py
+++ b/torchaudio/functional.py
@@ -26,6 +26,10 @@ __all__ = [
    "bandpass_biquad",
    "bandreject_biquad",
    "equalizer_biquad",
+    "band_biquad",
+    "treble_biquad",
+    "deemph_biquad",
+    "riaa_biquad",
    "biquad",
    'mask_along_axis',
    'mask_along_axis_iid'
@@ -912,6 +916,188 @@ def equalizer_biquad(waveform, sample_rate, center_freq, gain, Q=0.707):
    return biquad(waveform, b0, b1, b2, a0, a1, a2)
+def band_biquad(waveform, sample_rate, central_freq, Q=0.707, noise=False):
+    # type: (Tensor, int, float, float, bool) -> Tensor
+    r"""Design two-pole band filter.  Similar to SoX implementation.
+    Args:
+        waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
+        central_freq (float): central frequency (in Hz)
+        q_factor (float): https://en.wikipedia.org/wiki/Q_factor
+        noise (bool) : If ``True``, uses the alternate mode for un-pitched audio (e.g. percussion).
+            If ``False``, uses mode oriented to pitched audio, i.e. voice, singing,
+            or instrumental music. (Default: ``False``)
+    Returns:
+        output_waveform (torch.Tensor): Dimension of `(..., time)`
+    References:
+        http://sox.sourceforge.net/sox.html
+        https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
+    """
+    w0 = 2 * math.pi * central_freq / sample_rate
+    alpha = math.sin(w0) / 2 / Q
+    bw_Hz = central_freq / Q
+    a0 = 1.
+    a2 = math.exp(-2 * math.pi * bw_Hz / sample_rate)
+    a1 = -4 * a2 / (1 + a2) * math.cos(w0)
+    b0 = math.sqrt(1 - a1 * a1 / (4 * a2)) * (1 - a2)
+    if noise:
+        mult = math.sqrt(((1 + a2) * (1 + a2) - a1 * a1) * (1 - a2) / (1 + a2)) / b0
+        b0 *= mult
+    b1 = 0.
+    b2 = 0.
+    return biquad(waveform, b0, b1, b2, a0, a1, a2)
+def treble_biquad(waveform, sample_rate, gain, central_freq=3000, Q=0.707):
+    # type: (Tensor, int, float, float, float) -> Tensor
+    r"""Design a treble tone-control effect.  Similar to SoX implementation.
+    Args:
+        waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
+        gain (float): desired gain at the boost (or attenuation) in dB.
+        central_freq (float): central frequency (in Hz). (Default: ``3000``)
+        q_factor (float): https://en.wikipedia.org/wiki/Q_factor
+    Returns:
+        output_waveform (torch.Tensor): Dimension of `(..., time)`
+    References:
+        http://sox.sourceforge.net/sox.html
+        https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
+    """
+    w0 = 2 * math.pi * central_freq / sample_rate
+    alpha = math.sin(w0) / 2 / Q
+    A = math.exp(gain / 40 * math.log(10))
+    temp1 = 2 * math.sqrt(A) * alpha
+    temp2 = (A - 1) * math.cos(w0)
+    temp3 = (A + 1) * math.cos(w0)
+    b0 = A * ((A + 1) + temp2 + temp1)
+    b1 = -2 * A * ((A - 1) + temp3)
+    b2 = A * ((A + 1) + temp2 - temp1)
+    a0 = (A + 1) - temp2 + temp1
+    a1 = 2 * ((A - 1) - temp3)
+    a2 = (A + 1) - temp2 - temp1
+    return biquad(waveform, b0, b1, b2, a0, a1, a2)
+def deemph_biquad(waveform, sample_rate):
+    # type: (Tensor, int) -> Tensor
+    r"""Apply ISO 908 CD de-emphasis (shelving) IIR filter.  Similar to SoX implementation.
+    Args:
+        waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
+        sample_rate (int): sampling rate of the waveform, Allowed sample rate ``44100`` or ``48000``
+    Returns:
+        output_waveform (torch.Tensor): Dimension of `(..., time)`
+    References:
+        http://sox.sourceforge.net/sox.html
+        https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
+    """
+    if sample_rate == 44100:
+        central_freq = 5283
+        width_slope = 0.4845
+        gain = -9.477
+    elif sample_rate == 48000:
+        central_freq = 5356
+        width_slope = 0.479
+        gain = -9.62
+    else:
+        raise ValueError("Sample rate must be 44100 (audio-CD) or 48000 (DAT)")
+    w0 = 2 * math.pi * central_freq / sample_rate
+    A = math.exp(gain / 40.0 * math.log(10))
+    alpha = math.sin(w0) / 2 * math.sqrt((A + 1 / A) * (1 / width_slope - 1) + 2)
+    temp1 = 2 * math.sqrt(A) * alpha
+    temp2 = (A - 1) * math.cos(w0)
+    temp3 = (A + 1) * math.cos(w0)
+    b0 = A * ((A + 1) + temp2 + temp1)
+    b1 = -2 * A * ((A - 1) + temp3)
+    b2 = A * ((A + 1) + temp2 - temp1)
+    a0 = (A + 1) - temp2 + temp1
+    a1 = 2 * ((A - 1) - temp3)
+    a2 = (A + 1) - temp2 - temp1
+    return biquad(waveform, b0, b1, b2, a0, a1, a2)
+def riaa_biquad(waveform, sample_rate):
+    # type: (Tensor, int) -> Tensor
+    r"""Apply RIAA vinyl playback equalisation.  Similar to SoX implementation.
+    Args:
+        waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz).
+            Allowed sample rates in Hz : ``44100``,``48000``,``88200``,``96000``
+    Returns:
+        output_waveform (torch.Tensor): Dimension of `(..., time)`
+    References:
+        http://sox.sourceforge.net/sox.html
+        https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
+    """
+    if (sample_rate == 44100):
+        zeros = [-0.2014898, 0.9233820]
+        poles = [0.7083149, 0.9924091]
+    elif (sample_rate == 48000):
+        zeros = [-0.1766069, 0.9321590]
+        poles = [0.7396325, 0.9931330]
+    elif (sample_rate == 88200):
+        zeros = [-0.1168735, 0.9648312]
+        poles = [0.8590646, 0.9964002]
+    elif (sample_rate == 96000):
+        zeros = [-0.1141486, 0.9676817]
+        poles = [0.8699137, 0.9966946]
+    else:
+        raise ValueError("Sample rate must be 44.1k, 48k, 88.2k, or 96k")
+    # polynomial coefficients with roots zeros[0] and zeros[1]
+    b0 = 1.
+    b1 = -(zeros[0] + zeros[1])
+    b2 = (zeros[0] * zeros[1])
+    # polynomial coefficients with roots poles[0] and poles[1]
+    a0 = 1.
+    a1 = -(poles[0] + poles[1])
+    a2 = (poles[0] * poles[1])
+    # Normalise to 0dB at 1kHz
+    y = 2 * math.pi * 1000 / sample_rate
+    b_re = b0 + b1 * math.cos(-y) + b2 * math.cos(-2 * y)
+    a_re = a0 + a1 * math.cos(-y) + a2 * math.cos(-2 * y)
+    b_im = b1 * math.sin(-y) + b2 * math.sin(-2 * y)
+    a_im = a1 * math.sin(-y) + a2 * math.sin(-2 * y)
+    g = 1 / math.sqrt((b_re**2 + b_im**2) / (a_re**2 + a_im**2))
+    b0 *= g
+    b1 *= g
+    b2 *= g
+    return biquad(waveform, b0, b1, b2, a0, a1, a2)
 def mask_along_axis_iid(specgrams, mask_param, mask_value, axis):
    # type: (Tensor, int, float, int) -> Tensor
    r"""