Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
d58ac213
Unverified
Commit
d58ac213
authored
Feb 15, 2021
by
George Harding
Committed by
GitHub
Feb 15, 2021
Browse files
Improve MelSpectrogram librosa compatibility test (#1267)
parent
9222e437
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
80 additions
and
64 deletions
+80
-64
test/torchaudio_unittest/librosa_compatibility_test.py
test/torchaudio_unittest/librosa_compatibility_test.py
+80
-64
No files found.
test/torchaudio_unittest/librosa_compatibility_test.py
View file @
d58ac213
...
@@ -7,7 +7,7 @@ import torch
...
@@ -7,7 +7,7 @@ import torch
import
torchaudio
import
torchaudio
import
torchaudio.functional
as
F
import
torchaudio.functional
as
F
from
torchaudio._internal.module_utils
import
is_module_available
from
torchaudio._internal.module_utils
import
is_module_available
from
parameterized
import
parameterized
from
parameterized
import
parameterized
,
param
import
itertools
import
itertools
LIBROSA_AVAILABLE
=
is_module_available
(
'librosa'
)
LIBROSA_AVAILABLE
=
is_module_available
(
'librosa'
)
...
@@ -165,13 +165,17 @@ def _load_audio_asset(*asset_paths, **kwargs):
...
@@ -165,13 +165,17 @@ def _load_audio_asset(*asset_paths, **kwargs):
@
unittest
.
skipIf
(
not
LIBROSA_AVAILABLE
,
"Librosa not available"
)
@
unittest
.
skipIf
(
not
LIBROSA_AVAILABLE
,
"Librosa not available"
)
class
TestTransforms
(
common_utils
.
TorchaudioTestCase
):
class
TestTransforms
(
common_utils
.
TorchaudioTestCase
):
"""Test suite for functions in `transforms` module."""
"""Test suite for functions in `transforms` module."""
def
assert_compatibilities
(
self
,
n_fft
,
hop_length
,
power
,
n_mels
,
n_mfcc
,
sample_rate
):
common_utils
.
set_audio_backend
(
'default'
)
path
=
common_utils
.
get_asset_path
(
'sinewave.wav'
)
sound
,
sample_rate
=
common_utils
.
load_wav
(
path
)
sound_librosa
=
sound
.
cpu
().
numpy
().
squeeze
()
# (64000)
# test core spectrogram
@
parameterized
.
expand
([
param
(
n_fft
=
400
,
hop_length
=
200
,
power
=
2.0
),
param
(
n_fft
=
600
,
hop_length
=
100
,
power
=
2.0
),
param
(
n_fft
=
400
,
hop_length
=
200
,
power
=
3.0
),
param
(
n_fft
=
200
,
hop_length
=
50
,
power
=
2.0
),
])
def
test_spectrogram
(
self
,
n_fft
,
hop_length
,
power
):
sample_rate
=
16000
sound
=
common_utils
.
get_sinusoid
(
n_channels
=
1
,
sample_rate
=
sample_rate
)
sound_librosa
=
sound
.
cpu
().
numpy
().
squeeze
()
spect_transform
=
torchaudio
.
transforms
.
Spectrogram
(
spect_transform
=
torchaudio
.
transforms
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
)
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
)
out_librosa
,
_
=
librosa
.
core
.
spectrum
.
_spectrogram
(
out_librosa
,
_
=
librosa
.
core
.
spectrum
.
_spectrogram
(
...
@@ -180,19 +184,58 @@ class TestTransforms(common_utils.TorchaudioTestCase):
...
@@ -180,19 +184,58 @@ class TestTransforms(common_utils.TorchaudioTestCase):
out_torch
=
spect_transform
(
sound
).
squeeze
().
cpu
()
out_torch
=
spect_transform
(
sound
).
squeeze
().
cpu
()
self
.
assertEqual
(
out_torch
,
torch
.
from_numpy
(
out_librosa
),
atol
=
1e-5
,
rtol
=
1e-5
)
self
.
assertEqual
(
out_torch
,
torch
.
from_numpy
(
out_librosa
),
atol
=
1e-5
,
rtol
=
1e-5
)
# test mel spectrogram
@
parameterized
.
expand
([
param
(
norm
=
norm
,
**
p
.
kwargs
)
for
p
in
[
param
(
n_fft
=
400
,
hop_length
=
200
,
n_mels
=
128
),
param
(
n_fft
=
600
,
hop_length
=
100
,
n_mels
=
128
),
param
(
n_fft
=
200
,
hop_length
=
50
,
n_mels
=
128
),
]
for
norm
in
[
None
,
'slaney'
]
])
def
test_mel_spectrogram
(
self
,
n_fft
,
hop_length
,
n_mels
,
norm
):
sample_rate
=
16000
sound
=
common_utils
.
get_sinusoid
(
n_channels
=
1
,
sample_rate
=
sample_rate
)
sound_librosa
=
sound
.
cpu
().
numpy
().
squeeze
()
melspect_transform
=
torchaudio
.
transforms
.
MelSpectrogram
(
melspect_transform
=
torchaudio
.
transforms
.
MelSpectrogram
(
sample_rate
=
sample_rate
,
window_fn
=
torch
.
hann_window
,
sample_rate
=
sample_rate
,
window_fn
=
torch
.
hann_window
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
n_fft
=
n_fft
)
hop_length
=
hop_length
,
n_mels
=
n_mels
,
n_fft
=
n_fft
,
norm
=
norm
)
librosa_mel
=
librosa
.
feature
.
melspectrogram
(
librosa_mel
=
librosa
.
feature
.
melspectrogram
(
y
=
sound_librosa
,
sr
=
sample_rate
,
n_fft
=
n_fft
,
y
=
sound_librosa
,
sr
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
htk
=
True
,
norm
=
None
)
hop_length
=
hop_length
,
n_mels
=
n_mels
,
htk
=
True
,
norm
=
norm
)
librosa_mel_tensor
=
torch
.
from_numpy
(
librosa_mel
)
librosa_mel_tensor
=
torch
.
from_numpy
(
librosa_mel
)
torch_mel
=
melspect_transform
(
sound
).
squeeze
().
cpu
()
torch_mel
=
melspect_transform
(
sound
).
squeeze
().
cpu
()
self
.
assertEqual
(
self
.
assertEqual
(
torch_mel
.
type
(
librosa_mel_tensor
.
dtype
),
librosa_mel_tensor
,
atol
=
5e-3
,
rtol
=
1e-5
)
torch_mel
.
type
(
librosa_mel_tensor
.
dtype
),
librosa_mel_tensor
,
atol
=
5e-3
,
rtol
=
1e-5
)
# test s2db
@
parameterized
.
expand
([
param
(
norm
=
norm
,
**
p
.
kwargs
)
for
p
in
[
param
(
n_fft
=
400
,
hop_length
=
200
,
power
=
2.0
,
n_mels
=
128
),
param
(
n_fft
=
600
,
hop_length
=
100
,
power
=
2.0
,
n_mels
=
128
),
param
(
n_fft
=
400
,
hop_length
=
200
,
power
=
3.0
,
n_mels
=
128
),
# NOTE: Test passes offline, but fails on TravisCI (and CircleCI), see #372.
param
(
n_fft
=
200
,
hop_length
=
50
,
power
=
2.0
,
n_mels
=
128
,
skip_ci
=
True
),
]
for
norm
in
[
None
,
'slaney'
]
])
def
test_s2db
(
self
,
n_fft
,
hop_length
,
power
,
n_mels
,
norm
,
skip_ci
=
False
):
if
skip_ci
and
'CI'
in
os
.
environ
:
self
.
skipTest
(
'Test is known to fail on CI'
)
sample_rate
=
16000
sound
=
common_utils
.
get_sinusoid
(
n_channels
=
1
,
sample_rate
=
sample_rate
)
sound_librosa
=
sound
.
cpu
().
numpy
().
squeeze
()
spect_transform
=
torchaudio
.
transforms
.
Spectrogram
(
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
)
out_librosa
,
_
=
librosa
.
core
.
spectrum
.
_spectrogram
(
y
=
sound_librosa
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
power
=
power
)
melspect_transform
=
torchaudio
.
transforms
.
MelSpectrogram
(
sample_rate
=
sample_rate
,
window_fn
=
torch
.
hann_window
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
n_fft
=
n_fft
,
norm
=
norm
)
librosa_mel
=
librosa
.
feature
.
melspectrogram
(
y
=
sound_librosa
,
sr
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
htk
=
True
,
norm
=
norm
)
power_to_db_transform
=
torchaudio
.
transforms
.
AmplitudeToDB
(
'power'
,
80.
)
power_to_db_transform
=
torchaudio
.
transforms
.
AmplitudeToDB
(
'power'
,
80.
)
power_to_db_torch
=
power_to_db_transform
(
spect_transform
(
sound
)).
squeeze
().
cpu
()
power_to_db_torch
=
power_to_db_transform
(
spect_transform
(
sound
)).
squeeze
().
cpu
()
power_to_db_librosa
=
librosa
.
core
.
spectrum
.
power_to_db
(
out_librosa
)
power_to_db_librosa
=
librosa
.
core
.
spectrum
.
power_to_db
(
out_librosa
)
...
@@ -209,10 +252,19 @@ class TestTransforms(common_utils.TorchaudioTestCase):
...
@@ -209,10 +252,19 @@ class TestTransforms(common_utils.TorchaudioTestCase):
self
.
assertEqual
(
self
.
assertEqual
(
power_to_db_torch
.
type
(
db_librosa_tensor
.
dtype
),
db_librosa_tensor
,
atol
=
5e-3
,
rtol
=
1e-5
)
power_to_db_torch
.
type
(
db_librosa_tensor
.
dtype
),
db_librosa_tensor
,
atol
=
5e-3
,
rtol
=
1e-5
)
# test MFCC
@
parameterized
.
expand
([
melkwargs
=
{
'hop_length'
:
hop_length
,
'n_fft'
:
n_fft
}
param
(
n_fft
=
400
,
hop_length
=
200
,
n_mels
=
128
,
n_mfcc
=
40
),
mfcc_transform
=
torchaudio
.
transforms
.
MFCC
(
param
(
n_fft
=
600
,
hop_length
=
100
,
n_mels
=
128
,
n_mfcc
=
20
),
sample_rate
=
sample_rate
,
n_mfcc
=
n_mfcc
,
norm
=
'ortho'
,
melkwargs
=
melkwargs
)
param
(
n_fft
=
200
,
hop_length
=
50
,
n_mels
=
128
,
n_mfcc
=
50
),
])
def
test_mfcc
(
self
,
n_fft
,
hop_length
,
n_mels
,
n_mfcc
):
sample_rate
=
16000
sound
=
common_utils
.
get_sinusoid
(
n_channels
=
1
,
sample_rate
=
sample_rate
)
sound_librosa
=
sound
.
cpu
().
numpy
().
squeeze
()
librosa_mel
=
librosa
.
feature
.
melspectrogram
(
y
=
sound_librosa
,
sr
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
,
n_mels
=
n_mels
,
htk
=
True
,
norm
=
None
)
db_librosa
=
librosa
.
core
.
spectrum
.
power_to_db
(
librosa_mel
)
# librosa.feature.mfcc doesn't pass kwargs properly since some of the
# librosa.feature.mfcc doesn't pass kwargs properly since some of the
# kwargs for melspectrogram and mfcc are the same. We just follow the
# kwargs for melspectrogram and mfcc are the same. We just follow the
...
@@ -226,14 +278,24 @@ class TestTransforms(common_utils.TorchaudioTestCase):
...
@@ -226,14 +278,24 @@ class TestTransforms(common_utils.TorchaudioTestCase):
librosa_mfcc
=
scipy
.
fftpack
.
dct
(
db_librosa
,
axis
=
0
,
type
=
2
,
norm
=
'ortho'
)[:
n_mfcc
]
librosa_mfcc
=
scipy
.
fftpack
.
dct
(
db_librosa
,
axis
=
0
,
type
=
2
,
norm
=
'ortho'
)[:
n_mfcc
]
librosa_mfcc_tensor
=
torch
.
from_numpy
(
librosa_mfcc
)
librosa_mfcc_tensor
=
torch
.
from_numpy
(
librosa_mfcc
)
melkwargs
=
{
'hop_length'
:
hop_length
,
'n_fft'
:
n_fft
}
mfcc_transform
=
torchaudio
.
transforms
.
MFCC
(
sample_rate
=
sample_rate
,
n_mfcc
=
n_mfcc
,
norm
=
'ortho'
,
melkwargs
=
melkwargs
)
torch_mfcc
=
mfcc_transform
(
sound
).
squeeze
().
cpu
()
torch_mfcc
=
mfcc_transform
(
sound
).
squeeze
().
cpu
()
self
.
assertEqual
(
self
.
assertEqual
(
torch_mfcc
.
type
(
librosa_mfcc_tensor
.
dtype
),
librosa_mfcc_tensor
,
atol
=
5e-3
,
rtol
=
1e-5
)
torch_mfcc
.
type
(
librosa_mfcc_tensor
.
dtype
),
librosa_mfcc_tensor
,
atol
=
5e-3
,
rtol
=
1e-5
)
self
.
assert_compatibilities_spectral_centroid
(
sample_rate
,
n_fft
,
hop_length
,
sound
,
sound_librosa
)
@
parameterized
.
expand
([
param
(
n_fft
=
400
,
hop_length
=
200
),
def
assert_compatibilities_spectral_centroid
(
self
,
sample_rate
,
n_fft
,
hop_length
,
sound
,
sound_librosa
):
param
(
n_fft
=
600
,
hop_length
=
100
),
param
(
n_fft
=
200
,
hop_length
=
50
),
])
def
test_spectral_centroid
(
self
,
n_fft
,
hop_length
):
sample_rate
=
16000
sound
=
common_utils
.
get_sinusoid
(
n_channels
=
1
,
sample_rate
=
sample_rate
)
sound_librosa
=
sound
.
cpu
().
numpy
().
squeeze
()
spect_centroid
=
torchaudio
.
transforms
.
SpectralCentroid
(
spect_centroid
=
torchaudio
.
transforms
.
SpectralCentroid
(
sample_rate
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
)
sample_rate
=
sample_rate
,
n_fft
=
n_fft
,
hop_length
=
hop_length
)
out_torch
=
spect_centroid
(
sound
).
squeeze
().
cpu
()
out_torch
=
spect_centroid
(
sound
).
squeeze
().
cpu
()
...
@@ -244,52 +306,6 @@ class TestTransforms(common_utils.TorchaudioTestCase):
...
@@ -244,52 +306,6 @@ class TestTransforms(common_utils.TorchaudioTestCase):
self
.
assertEqual
(
out_torch
.
type
(
out_librosa
.
dtype
),
out_librosa
,
atol
=
1e-5
,
rtol
=
1e-5
)
self
.
assertEqual
(
out_torch
.
type
(
out_librosa
.
dtype
),
out_librosa
,
atol
=
1e-5
,
rtol
=
1e-5
)
def
test_basics1
(
self
):
kwargs
=
{
'n_fft'
:
400
,
'hop_length'
:
200
,
'power'
:
2.0
,
'n_mels'
:
128
,
'n_mfcc'
:
40
,
'sample_rate'
:
16000
}
self
.
assert_compatibilities
(
**
kwargs
)
def
test_basics2
(
self
):
kwargs
=
{
'n_fft'
:
600
,
'hop_length'
:
100
,
'power'
:
2.0
,
'n_mels'
:
128
,
'n_mfcc'
:
20
,
'sample_rate'
:
16000
}
self
.
assert_compatibilities
(
**
kwargs
)
# NOTE: Test passes offline, but fails on TravisCI (and CircleCI), see #372.
@
unittest
.
skipIf
(
'CI'
in
os
.
environ
,
'Test is known to fail on CI'
)
def
test_basics3
(
self
):
kwargs
=
{
'n_fft'
:
200
,
'hop_length'
:
50
,
'power'
:
2.0
,
'n_mels'
:
128
,
'n_mfcc'
:
50
,
'sample_rate'
:
24000
}
self
.
assert_compatibilities
(
**
kwargs
)
def
test_basics4
(
self
):
kwargs
=
{
'n_fft'
:
400
,
'hop_length'
:
200
,
'power'
:
3.0
,
'n_mels'
:
128
,
'n_mfcc'
:
40
,
'sample_rate'
:
16000
}
self
.
assert_compatibilities
(
**
kwargs
)
def
test_MelScale
(
self
):
def
test_MelScale
(
self
):
"""MelScale transform is comparable to that of librosa"""
"""MelScale transform is comparable to that of librosa"""
n_fft
=
2048
n_fft
=
2048
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment