"git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "1a18c41dc42438fed6948804c72903c8c441215a"
Unverified Commit df9a0417 authored by Bhargav Kathivarapu's avatar Bhargav Kathivarapu Committed by GitHub
Browse files

Add band,treble,deemph,riaa to functional.py (#470)

parent 11fb22aa
...@@ -230,6 +230,103 @@ class TestFunctionalFiltering(unittest.TestCase): ...@@ -230,6 +230,103 @@ class TestFunctionalFiltering(unittest.TestCase):
assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4) assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
_test_torchscript_functional(F.bandreject_biquad, waveform, sample_rate, CENTRAL_FREQ, Q) _test_torchscript_functional(F.bandreject_biquad, waveform, sample_rate, CENTRAL_FREQ, Q)
def test_band_with_noise(self):
"""
Test biquad band filter with noise mode, compare to SoX implementation
"""
CENTRAL_FREQ = 1000
Q = 0.707
NOISE = True
noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("band", ["-n", CENTRAL_FREQ, str(Q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
_test_torchscript_functional(F.band_biquad, waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
def test_band_without_noise(self):
"""
Test biquad band filter without noise mode, compare to SoX implementation
"""
CENTRAL_FREQ = 1000
Q = 0.707
NOISE = False
noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("band", [CENTRAL_FREQ, str(Q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
_test_torchscript_functional(F.band_biquad, waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
def test_treble(self):
"""
Test biquad treble filter, compare to SoX implementation
"""
CENTRAL_FREQ = 1000
Q = 0.707
GAIN = 40
noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("treble", [GAIN, CENTRAL_FREQ, str(Q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.treble_biquad(waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)
assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
_test_torchscript_functional(F.treble_biquad, waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)
def test_deemph(self):
"""
Test biquad deemph filter, compare to SoX implementation
"""
noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("deemph")
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.deemph_biquad(waveform, sample_rate)
assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
_test_torchscript_functional(F.deemph_biquad, waveform, sample_rate)
def test_riaa(self):
"""
Test biquad riaa filter, compare to SoX implementation
"""
noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.mp3")
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("riaa")
sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.riaa_biquad(waveform, sample_rate)
assert torch.allclose(sox_output_waveform, output_waveform, atol=1e-4)
_test_torchscript_functional(F.riaa_biquad, waveform, sample_rate)
def test_equalizer(self): def test_equalizer(self):
""" """
Test biquad peaking equalizer filter, compare to SoX implementation Test biquad peaking equalizer filter, compare to SoX implementation
......
...@@ -26,6 +26,10 @@ __all__ = [ ...@@ -26,6 +26,10 @@ __all__ = [
"bandpass_biquad", "bandpass_biquad",
"bandreject_biquad", "bandreject_biquad",
"equalizer_biquad", "equalizer_biquad",
"band_biquad",
"treble_biquad",
"deemph_biquad",
"riaa_biquad",
"biquad", "biquad",
'mask_along_axis', 'mask_along_axis',
'mask_along_axis_iid' 'mask_along_axis_iid'
...@@ -912,6 +916,188 @@ def equalizer_biquad(waveform, sample_rate, center_freq, gain, Q=0.707): ...@@ -912,6 +916,188 @@ def equalizer_biquad(waveform, sample_rate, center_freq, gain, Q=0.707):
return biquad(waveform, b0, b1, b2, a0, a1, a2) return biquad(waveform, b0, b1, b2, a0, a1, a2)
def band_biquad(waveform, sample_rate, central_freq, Q=0.707, noise=False):
# type: (Tensor, int, float, float, bool) -> Tensor
r"""Design two-pole band filter. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
central_freq (float): central frequency (in Hz)
q_factor (float): https://en.wikipedia.org/wiki/Q_factor
noise (bool) : If ``True``, uses the alternate mode for un-pitched audio (e.g. percussion).
If ``False``, uses mode oriented to pitched audio, i.e. voice, singing,
or instrumental music. (Default: ``False``)
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
w0 = 2 * math.pi * central_freq / sample_rate
alpha = math.sin(w0) / 2 / Q
bw_Hz = central_freq / Q
a0 = 1.
a2 = math.exp(-2 * math.pi * bw_Hz / sample_rate)
a1 = -4 * a2 / (1 + a2) * math.cos(w0)
b0 = math.sqrt(1 - a1 * a1 / (4 * a2)) * (1 - a2)
if noise:
mult = math.sqrt(((1 + a2) * (1 + a2) - a1 * a1) * (1 - a2) / (1 + a2)) / b0
b0 *= mult
b1 = 0.
b2 = 0.
return biquad(waveform, b0, b1, b2, a0, a1, a2)
def treble_biquad(waveform, sample_rate, gain, central_freq=3000, Q=0.707):
# type: (Tensor, int, float, float, float) -> Tensor
r"""Design a treble tone-control effect. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
gain (float): desired gain at the boost (or attenuation) in dB.
central_freq (float): central frequency (in Hz). (Default: ``3000``)
q_factor (float): https://en.wikipedia.org/wiki/Q_factor
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
w0 = 2 * math.pi * central_freq / sample_rate
alpha = math.sin(w0) / 2 / Q
A = math.exp(gain / 40 * math.log(10))
temp1 = 2 * math.sqrt(A) * alpha
temp2 = (A - 1) * math.cos(w0)
temp3 = (A + 1) * math.cos(w0)
b0 = A * ((A + 1) + temp2 + temp1)
b1 = -2 * A * ((A - 1) + temp3)
b2 = A * ((A + 1) + temp2 - temp1)
a0 = (A + 1) - temp2 + temp1
a1 = 2 * ((A - 1) - temp3)
a2 = (A + 1) - temp2 - temp1
return biquad(waveform, b0, b1, b2, a0, a1, a2)
def deemph_biquad(waveform, sample_rate):
# type: (Tensor, int) -> Tensor
r"""Apply ISO 908 CD de-emphasis (shelving) IIR filter. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, Allowed sample rate ``44100`` or ``48000``
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
if sample_rate == 44100:
central_freq = 5283
width_slope = 0.4845
gain = -9.477
elif sample_rate == 48000:
central_freq = 5356
width_slope = 0.479
gain = -9.62
else:
raise ValueError("Sample rate must be 44100 (audio-CD) or 48000 (DAT)")
w0 = 2 * math.pi * central_freq / sample_rate
A = math.exp(gain / 40.0 * math.log(10))
alpha = math.sin(w0) / 2 * math.sqrt((A + 1 / A) * (1 / width_slope - 1) + 2)
temp1 = 2 * math.sqrt(A) * alpha
temp2 = (A - 1) * math.cos(w0)
temp3 = (A + 1) * math.cos(w0)
b0 = A * ((A + 1) + temp2 + temp1)
b1 = -2 * A * ((A - 1) + temp3)
b2 = A * ((A + 1) + temp2 - temp1)
a0 = (A + 1) - temp2 + temp1
a1 = 2 * ((A - 1) - temp3)
a2 = (A + 1) - temp2 - temp1
return biquad(waveform, b0, b1, b2, a0, a1, a2)
def riaa_biquad(waveform, sample_rate):
# type: (Tensor, int) -> Tensor
r"""Apply RIAA vinyl playback equalisation. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz).
Allowed sample rates in Hz : ``44100``,``48000``,``88200``,``96000``
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
if (sample_rate == 44100):
zeros = [-0.2014898, 0.9233820]
poles = [0.7083149, 0.9924091]
elif (sample_rate == 48000):
zeros = [-0.1766069, 0.9321590]
poles = [0.7396325, 0.9931330]
elif (sample_rate == 88200):
zeros = [-0.1168735, 0.9648312]
poles = [0.8590646, 0.9964002]
elif (sample_rate == 96000):
zeros = [-0.1141486, 0.9676817]
poles = [0.8699137, 0.9966946]
else:
raise ValueError("Sample rate must be 44.1k, 48k, 88.2k, or 96k")
# polynomial coefficients with roots zeros[0] and zeros[1]
b0 = 1.
b1 = -(zeros[0] + zeros[1])
b2 = (zeros[0] * zeros[1])
# polynomial coefficients with roots poles[0] and poles[1]
a0 = 1.
a1 = -(poles[0] + poles[1])
a2 = (poles[0] * poles[1])
# Normalise to 0dB at 1kHz
y = 2 * math.pi * 1000 / sample_rate
b_re = b0 + b1 * math.cos(-y) + b2 * math.cos(-2 * y)
a_re = a0 + a1 * math.cos(-y) + a2 * math.cos(-2 * y)
b_im = b1 * math.sin(-y) + b2 * math.sin(-2 * y)
a_im = a1 * math.sin(-y) + a2 * math.sin(-2 * y)
g = 1 / math.sqrt((b_re**2 + b_im**2) / (a_re**2 + a_im**2))
b0 *= g
b1 *= g
b2 *= g
return biquad(waveform, b0, b1, b2, a0, a1, a2)
def mask_along_axis_iid(specgrams, mask_param, mask_value, axis): def mask_along_axis_iid(specgrams, mask_param, mask_value, axis):
# type: (Tensor, int, float, int) -> Tensor # type: (Tensor, int, float, int) -> Tensor
r""" r"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment