Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
df9a0417
Unverified
Commit
df9a0417
authored
Mar 24, 2020
by
Bhargav Kathivarapu
Committed by
GitHub
Mar 24, 2020
Browse files
Add band,treble,deemph,riaa to functional.py (#470)
parent
11fb22aa
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
283 additions
and
0 deletions
+283
-0
test/test_functional_filtering.py
test/test_functional_filtering.py
+97
-0
torchaudio/functional.py
torchaudio/functional.py
+186
-0
No files found.
test/test_functional_filtering.py
View file @
df9a0417
...
@@ -230,6 +230,103 @@ class TestFunctionalFiltering(unittest.TestCase):
...
@@ -230,6 +230,103 @@ class TestFunctionalFiltering(unittest.TestCase):
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
_test_torchscript_functional
(
F
.
bandreject_biquad
,
waveform
,
sample_rate
,
CENTRAL_FREQ
,
Q
)
_test_torchscript_functional
(
F
.
bandreject_biquad
,
waveform
,
sample_rate
,
CENTRAL_FREQ
,
Q
)
def
test_band_with_noise
(
self
):
"""
Test biquad band filter with noise mode, compare to SoX implementation
"""
CENTRAL_FREQ
=
1000
Q
=
0.707
NOISE
=
True
noise_filepath
=
os
.
path
.
join
(
self
.
test_dirpath
,
"assets"
,
"whitenoise.mp3"
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
()
E
.
set_input_file
(
noise_filepath
)
E
.
append_effect_to_chain
(
"band"
,
[
"-n"
,
CENTRAL_FREQ
,
str
(
Q
)
+
'q'
])
sox_output_waveform
,
sr
=
E
.
sox_build_flow_effects
()
waveform
,
sample_rate
=
torchaudio
.
load
(
noise_filepath
,
normalization
=
True
)
output_waveform
=
F
.
band_biquad
(
waveform
,
sample_rate
,
CENTRAL_FREQ
,
Q
,
NOISE
)
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
_test_torchscript_functional
(
F
.
band_biquad
,
waveform
,
sample_rate
,
CENTRAL_FREQ
,
Q
,
NOISE
)
def
test_band_without_noise
(
self
):
"""
Test biquad band filter without noise mode, compare to SoX implementation
"""
CENTRAL_FREQ
=
1000
Q
=
0.707
NOISE
=
False
noise_filepath
=
os
.
path
.
join
(
self
.
test_dirpath
,
"assets"
,
"whitenoise.mp3"
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
()
E
.
set_input_file
(
noise_filepath
)
E
.
append_effect_to_chain
(
"band"
,
[
CENTRAL_FREQ
,
str
(
Q
)
+
'q'
])
sox_output_waveform
,
sr
=
E
.
sox_build_flow_effects
()
waveform
,
sample_rate
=
torchaudio
.
load
(
noise_filepath
,
normalization
=
True
)
output_waveform
=
F
.
band_biquad
(
waveform
,
sample_rate
,
CENTRAL_FREQ
,
Q
,
NOISE
)
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
_test_torchscript_functional
(
F
.
band_biquad
,
waveform
,
sample_rate
,
CENTRAL_FREQ
,
Q
,
NOISE
)
def
test_treble
(
self
):
"""
Test biquad treble filter, compare to SoX implementation
"""
CENTRAL_FREQ
=
1000
Q
=
0.707
GAIN
=
40
noise_filepath
=
os
.
path
.
join
(
self
.
test_dirpath
,
"assets"
,
"whitenoise.mp3"
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
()
E
.
set_input_file
(
noise_filepath
)
E
.
append_effect_to_chain
(
"treble"
,
[
GAIN
,
CENTRAL_FREQ
,
str
(
Q
)
+
'q'
])
sox_output_waveform
,
sr
=
E
.
sox_build_flow_effects
()
waveform
,
sample_rate
=
torchaudio
.
load
(
noise_filepath
,
normalization
=
True
)
output_waveform
=
F
.
treble_biquad
(
waveform
,
sample_rate
,
GAIN
,
CENTRAL_FREQ
,
Q
)
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
_test_torchscript_functional
(
F
.
treble_biquad
,
waveform
,
sample_rate
,
GAIN
,
CENTRAL_FREQ
,
Q
)
def
test_deemph
(
self
):
"""
Test biquad deemph filter, compare to SoX implementation
"""
noise_filepath
=
os
.
path
.
join
(
self
.
test_dirpath
,
"assets"
,
"whitenoise.mp3"
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
()
E
.
set_input_file
(
noise_filepath
)
E
.
append_effect_to_chain
(
"deemph"
)
sox_output_waveform
,
sr
=
E
.
sox_build_flow_effects
()
waveform
,
sample_rate
=
torchaudio
.
load
(
noise_filepath
,
normalization
=
True
)
output_waveform
=
F
.
deemph_biquad
(
waveform
,
sample_rate
)
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
_test_torchscript_functional
(
F
.
deemph_biquad
,
waveform
,
sample_rate
)
def
test_riaa
(
self
):
"""
Test biquad riaa filter, compare to SoX implementation
"""
noise_filepath
=
os
.
path
.
join
(
self
.
test_dirpath
,
"assets"
,
"whitenoise.mp3"
)
E
=
torchaudio
.
sox_effects
.
SoxEffectsChain
()
E
.
set_input_file
(
noise_filepath
)
E
.
append_effect_to_chain
(
"riaa"
)
sox_output_waveform
,
sr
=
E
.
sox_build_flow_effects
()
waveform
,
sample_rate
=
torchaudio
.
load
(
noise_filepath
,
normalization
=
True
)
output_waveform
=
F
.
riaa_biquad
(
waveform
,
sample_rate
)
assert
torch
.
allclose
(
sox_output_waveform
,
output_waveform
,
atol
=
1e-4
)
_test_torchscript_functional
(
F
.
riaa_biquad
,
waveform
,
sample_rate
)
def
test_equalizer
(
self
):
def
test_equalizer
(
self
):
"""
"""
Test biquad peaking equalizer filter, compare to SoX implementation
Test biquad peaking equalizer filter, compare to SoX implementation
...
...
torchaudio/functional.py
View file @
df9a0417
...
@@ -26,6 +26,10 @@ __all__ = [
...
@@ -26,6 +26,10 @@ __all__ = [
"bandpass_biquad"
,
"bandpass_biquad"
,
"bandreject_biquad"
,
"bandreject_biquad"
,
"equalizer_biquad"
,
"equalizer_biquad"
,
"band_biquad"
,
"treble_biquad"
,
"deemph_biquad"
,
"riaa_biquad"
,
"biquad"
,
"biquad"
,
'mask_along_axis'
,
'mask_along_axis'
,
'mask_along_axis_iid'
'mask_along_axis_iid'
...
@@ -912,6 +916,188 @@ def equalizer_biquad(waveform, sample_rate, center_freq, gain, Q=0.707):
...
@@ -912,6 +916,188 @@ def equalizer_biquad(waveform, sample_rate, center_freq, gain, Q=0.707):
return
biquad
(
waveform
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
)
return
biquad
(
waveform
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
)
def
band_biquad
(
waveform
,
sample_rate
,
central_freq
,
Q
=
0.707
,
noise
=
False
):
# type: (Tensor, int, float, float, bool) -> Tensor
r
"""Design two-pole band filter. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
central_freq (float): central frequency (in Hz)
q_factor (float): https://en.wikipedia.org/wiki/Q_factor
noise (bool) : If ``True``, uses the alternate mode for un-pitched audio (e.g. percussion).
If ``False``, uses mode oriented to pitched audio, i.e. voice, singing,
or instrumental music. (Default: ``False``)
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
w0
=
2
*
math
.
pi
*
central_freq
/
sample_rate
alpha
=
math
.
sin
(
w0
)
/
2
/
Q
bw_Hz
=
central_freq
/
Q
a0
=
1.
a2
=
math
.
exp
(
-
2
*
math
.
pi
*
bw_Hz
/
sample_rate
)
a1
=
-
4
*
a2
/
(
1
+
a2
)
*
math
.
cos
(
w0
)
b0
=
math
.
sqrt
(
1
-
a1
*
a1
/
(
4
*
a2
))
*
(
1
-
a2
)
if
noise
:
mult
=
math
.
sqrt
(((
1
+
a2
)
*
(
1
+
a2
)
-
a1
*
a1
)
*
(
1
-
a2
)
/
(
1
+
a2
))
/
b0
b0
*=
mult
b1
=
0.
b2
=
0.
return
biquad
(
waveform
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
)
def
treble_biquad
(
waveform
,
sample_rate
,
gain
,
central_freq
=
3000
,
Q
=
0.707
):
# type: (Tensor, int, float, float, float) -> Tensor
r
"""Design a treble tone-control effect. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
gain (float): desired gain at the boost (or attenuation) in dB.
central_freq (float): central frequency (in Hz). (Default: ``3000``)
q_factor (float): https://en.wikipedia.org/wiki/Q_factor
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
w0
=
2
*
math
.
pi
*
central_freq
/
sample_rate
alpha
=
math
.
sin
(
w0
)
/
2
/
Q
A
=
math
.
exp
(
gain
/
40
*
math
.
log
(
10
))
temp1
=
2
*
math
.
sqrt
(
A
)
*
alpha
temp2
=
(
A
-
1
)
*
math
.
cos
(
w0
)
temp3
=
(
A
+
1
)
*
math
.
cos
(
w0
)
b0
=
A
*
((
A
+
1
)
+
temp2
+
temp1
)
b1
=
-
2
*
A
*
((
A
-
1
)
+
temp3
)
b2
=
A
*
((
A
+
1
)
+
temp2
-
temp1
)
a0
=
(
A
+
1
)
-
temp2
+
temp1
a1
=
2
*
((
A
-
1
)
-
temp3
)
a2
=
(
A
+
1
)
-
temp2
-
temp1
return
biquad
(
waveform
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
)
def
deemph_biquad
(
waveform
,
sample_rate
):
# type: (Tensor, int) -> Tensor
r
"""Apply ISO 908 CD de-emphasis (shelving) IIR filter. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, Allowed sample rate ``44100`` or ``48000``
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
if
sample_rate
==
44100
:
central_freq
=
5283
width_slope
=
0.4845
gain
=
-
9.477
elif
sample_rate
==
48000
:
central_freq
=
5356
width_slope
=
0.479
gain
=
-
9.62
else
:
raise
ValueError
(
"Sample rate must be 44100 (audio-CD) or 48000 (DAT)"
)
w0
=
2
*
math
.
pi
*
central_freq
/
sample_rate
A
=
math
.
exp
(
gain
/
40.0
*
math
.
log
(
10
))
alpha
=
math
.
sin
(
w0
)
/
2
*
math
.
sqrt
((
A
+
1
/
A
)
*
(
1
/
width_slope
-
1
)
+
2
)
temp1
=
2
*
math
.
sqrt
(
A
)
*
alpha
temp2
=
(
A
-
1
)
*
math
.
cos
(
w0
)
temp3
=
(
A
+
1
)
*
math
.
cos
(
w0
)
b0
=
A
*
((
A
+
1
)
+
temp2
+
temp1
)
b1
=
-
2
*
A
*
((
A
-
1
)
+
temp3
)
b2
=
A
*
((
A
+
1
)
+
temp2
-
temp1
)
a0
=
(
A
+
1
)
-
temp2
+
temp1
a1
=
2
*
((
A
-
1
)
-
temp3
)
a2
=
(
A
+
1
)
-
temp2
-
temp1
return
biquad
(
waveform
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
)
def
riaa_biquad
(
waveform
,
sample_rate
):
# type: (Tensor, int) -> Tensor
r
"""Apply RIAA vinyl playback equalisation. Similar to SoX implementation.
Args:
waveform(torch.Tensor): audio waveform of dimension of `(..., time)`
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz).
Allowed sample rates in Hz : ``44100``,``48000``,``88200``,``96000``
Returns:
output_waveform (torch.Tensor): Dimension of `(..., time)`
References:
http://sox.sourceforge.net/sox.html
https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
"""
if
(
sample_rate
==
44100
):
zeros
=
[
-
0.2014898
,
0.9233820
]
poles
=
[
0.7083149
,
0.9924091
]
elif
(
sample_rate
==
48000
):
zeros
=
[
-
0.1766069
,
0.9321590
]
poles
=
[
0.7396325
,
0.9931330
]
elif
(
sample_rate
==
88200
):
zeros
=
[
-
0.1168735
,
0.9648312
]
poles
=
[
0.8590646
,
0.9964002
]
elif
(
sample_rate
==
96000
):
zeros
=
[
-
0.1141486
,
0.9676817
]
poles
=
[
0.8699137
,
0.9966946
]
else
:
raise
ValueError
(
"Sample rate must be 44.1k, 48k, 88.2k, or 96k"
)
# polynomial coefficients with roots zeros[0] and zeros[1]
b0
=
1.
b1
=
-
(
zeros
[
0
]
+
zeros
[
1
])
b2
=
(
zeros
[
0
]
*
zeros
[
1
])
# polynomial coefficients with roots poles[0] and poles[1]
a0
=
1.
a1
=
-
(
poles
[
0
]
+
poles
[
1
])
a2
=
(
poles
[
0
]
*
poles
[
1
])
# Normalise to 0dB at 1kHz
y
=
2
*
math
.
pi
*
1000
/
sample_rate
b_re
=
b0
+
b1
*
math
.
cos
(
-
y
)
+
b2
*
math
.
cos
(
-
2
*
y
)
a_re
=
a0
+
a1
*
math
.
cos
(
-
y
)
+
a2
*
math
.
cos
(
-
2
*
y
)
b_im
=
b1
*
math
.
sin
(
-
y
)
+
b2
*
math
.
sin
(
-
2
*
y
)
a_im
=
a1
*
math
.
sin
(
-
y
)
+
a2
*
math
.
sin
(
-
2
*
y
)
g
=
1
/
math
.
sqrt
((
b_re
**
2
+
b_im
**
2
)
/
(
a_re
**
2
+
a_im
**
2
))
b0
*=
g
b1
*=
g
b2
*=
g
return
biquad
(
waveform
,
b0
,
b1
,
b2
,
a0
,
a1
,
a2
)
def
mask_along_axis_iid
(
specgrams
,
mask_param
,
mask_value
,
axis
):
def
mask_along_axis_iid
(
specgrams
,
mask_param
,
mask_value
,
axis
):
# type: (Tensor, int, float, int) -> Tensor
# type: (Tensor, int, float, int) -> Tensor
r
"""
r
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment