Unverified Commit d58ac213 authored by George Harding's avatar George Harding Committed by GitHub
Browse files

Improve MelSpectrogram librosa compatibility test (#1267)

parent 9222e437
...@@ -7,7 +7,7 @@ import torch ...@@ -7,7 +7,7 @@ import torch
import torchaudio import torchaudio
import torchaudio.functional as F import torchaudio.functional as F
from torchaudio._internal.module_utils import is_module_available from torchaudio._internal.module_utils import is_module_available
from parameterized import parameterized from parameterized import parameterized, param
import itertools import itertools
LIBROSA_AVAILABLE = is_module_available('librosa') LIBROSA_AVAILABLE = is_module_available('librosa')
...@@ -165,13 +165,17 @@ def _load_audio_asset(*asset_paths, **kwargs): ...@@ -165,13 +165,17 @@ def _load_audio_asset(*asset_paths, **kwargs):
@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available") @unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
class TestTransforms(common_utils.TorchaudioTestCase): class TestTransforms(common_utils.TorchaudioTestCase):
"""Test suite for functions in `transforms` module.""" """Test suite for functions in `transforms` module."""
def assert_compatibilities(self, n_fft, hop_length, power, n_mels, n_mfcc, sample_rate):
common_utils.set_audio_backend('default')
path = common_utils.get_asset_path('sinewave.wav')
sound, sample_rate = common_utils.load_wav(path)
sound_librosa = sound.cpu().numpy().squeeze() # (64000)
# test core spectrogram @parameterized.expand([
param(n_fft=400, hop_length=200, power=2.0),
param(n_fft=600, hop_length=100, power=2.0),
param(n_fft=400, hop_length=200, power=3.0),
param(n_fft=200, hop_length=50, power=2.0),
])
def test_spectrogram(self, n_fft, hop_length, power):
sample_rate = 16000
sound = common_utils.get_sinusoid(n_channels=1, sample_rate=sample_rate)
sound_librosa = sound.cpu().numpy().squeeze()
spect_transform = torchaudio.transforms.Spectrogram( spect_transform = torchaudio.transforms.Spectrogram(
n_fft=n_fft, hop_length=hop_length, power=power) n_fft=n_fft, hop_length=hop_length, power=power)
out_librosa, _ = librosa.core.spectrum._spectrogram( out_librosa, _ = librosa.core.spectrum._spectrogram(
...@@ -180,19 +184,58 @@ class TestTransforms(common_utils.TorchaudioTestCase): ...@@ -180,19 +184,58 @@ class TestTransforms(common_utils.TorchaudioTestCase):
out_torch = spect_transform(sound).squeeze().cpu() out_torch = spect_transform(sound).squeeze().cpu()
self.assertEqual(out_torch, torch.from_numpy(out_librosa), atol=1e-5, rtol=1e-5) self.assertEqual(out_torch, torch.from_numpy(out_librosa), atol=1e-5, rtol=1e-5)
# test mel spectrogram @parameterized.expand([
param(norm=norm, **p.kwargs)
for p in [
param(n_fft=400, hop_length=200, n_mels=128),
param(n_fft=600, hop_length=100, n_mels=128),
param(n_fft=200, hop_length=50, n_mels=128),
]
for norm in [None, 'slaney']
])
def test_mel_spectrogram(self, n_fft, hop_length, n_mels, norm):
sample_rate = 16000
sound = common_utils.get_sinusoid(n_channels=1, sample_rate=sample_rate)
sound_librosa = sound.cpu().numpy().squeeze()
melspect_transform = torchaudio.transforms.MelSpectrogram( melspect_transform = torchaudio.transforms.MelSpectrogram(
sample_rate=sample_rate, window_fn=torch.hann_window, sample_rate=sample_rate, window_fn=torch.hann_window,
hop_length=hop_length, n_mels=n_mels, n_fft=n_fft) hop_length=hop_length, n_mels=n_mels, n_fft=n_fft, norm=norm)
librosa_mel = librosa.feature.melspectrogram( librosa_mel = librosa.feature.melspectrogram(
y=sound_librosa, sr=sample_rate, n_fft=n_fft, y=sound_librosa, sr=sample_rate, n_fft=n_fft,
hop_length=hop_length, n_mels=n_mels, htk=True, norm=None) hop_length=hop_length, n_mels=n_mels, htk=True, norm=norm)
librosa_mel_tensor = torch.from_numpy(librosa_mel) librosa_mel_tensor = torch.from_numpy(librosa_mel)
torch_mel = melspect_transform(sound).squeeze().cpu() torch_mel = melspect_transform(sound).squeeze().cpu()
self.assertEqual( self.assertEqual(
torch_mel.type(librosa_mel_tensor.dtype), librosa_mel_tensor, atol=5e-3, rtol=1e-5) torch_mel.type(librosa_mel_tensor.dtype), librosa_mel_tensor, atol=5e-3, rtol=1e-5)
# test s2db @parameterized.expand([
param(norm=norm, **p.kwargs)
for p in [
param(n_fft=400, hop_length=200, power=2.0, n_mels=128),
param(n_fft=600, hop_length=100, power=2.0, n_mels=128),
param(n_fft=400, hop_length=200, power=3.0, n_mels=128),
# NOTE: Test passes offline, but fails on TravisCI (and CircleCI), see #372.
param(n_fft=200, hop_length=50, power=2.0, n_mels=128, skip_ci=True),
]
for norm in [None, 'slaney']
])
def test_s2db(self, n_fft, hop_length, power, n_mels, norm, skip_ci=False):
if skip_ci and 'CI' in os.environ:
self.skipTest('Test is known to fail on CI')
sample_rate = 16000
sound = common_utils.get_sinusoid(n_channels=1, sample_rate=sample_rate)
sound_librosa = sound.cpu().numpy().squeeze()
spect_transform = torchaudio.transforms.Spectrogram(
n_fft=n_fft, hop_length=hop_length, power=power)
out_librosa, _ = librosa.core.spectrum._spectrogram(
y=sound_librosa, n_fft=n_fft, hop_length=hop_length, power=power)
melspect_transform = torchaudio.transforms.MelSpectrogram(
sample_rate=sample_rate, window_fn=torch.hann_window,
hop_length=hop_length, n_mels=n_mels, n_fft=n_fft, norm=norm)
librosa_mel = librosa.feature.melspectrogram(
y=sound_librosa, sr=sample_rate, n_fft=n_fft,
hop_length=hop_length, n_mels=n_mels, htk=True, norm=norm)
power_to_db_transform = torchaudio.transforms.AmplitudeToDB('power', 80.) power_to_db_transform = torchaudio.transforms.AmplitudeToDB('power', 80.)
power_to_db_torch = power_to_db_transform(spect_transform(sound)).squeeze().cpu() power_to_db_torch = power_to_db_transform(spect_transform(sound)).squeeze().cpu()
power_to_db_librosa = librosa.core.spectrum.power_to_db(out_librosa) power_to_db_librosa = librosa.core.spectrum.power_to_db(out_librosa)
...@@ -209,10 +252,19 @@ class TestTransforms(common_utils.TorchaudioTestCase): ...@@ -209,10 +252,19 @@ class TestTransforms(common_utils.TorchaudioTestCase):
self.assertEqual( self.assertEqual(
power_to_db_torch.type(db_librosa_tensor.dtype), db_librosa_tensor, atol=5e-3, rtol=1e-5) power_to_db_torch.type(db_librosa_tensor.dtype), db_librosa_tensor, atol=5e-3, rtol=1e-5)
# test MFCC @parameterized.expand([
melkwargs = {'hop_length': hop_length, 'n_fft': n_fft} param(n_fft=400, hop_length=200, n_mels=128, n_mfcc=40),
mfcc_transform = torchaudio.transforms.MFCC( param(n_fft=600, hop_length=100, n_mels=128, n_mfcc=20),
sample_rate=sample_rate, n_mfcc=n_mfcc, norm='ortho', melkwargs=melkwargs) param(n_fft=200, hop_length=50, n_mels=128, n_mfcc=50),
])
def test_mfcc(self, n_fft, hop_length, n_mels, n_mfcc):
sample_rate = 16000
sound = common_utils.get_sinusoid(n_channels=1, sample_rate=sample_rate)
sound_librosa = sound.cpu().numpy().squeeze()
librosa_mel = librosa.feature.melspectrogram(
y=sound_librosa, sr=sample_rate, n_fft=n_fft,
hop_length=hop_length, n_mels=n_mels, htk=True, norm=None)
db_librosa = librosa.core.spectrum.power_to_db(librosa_mel)
# librosa.feature.mfcc doesn't pass kwargs properly since some of the # librosa.feature.mfcc doesn't pass kwargs properly since some of the
# kwargs for melspectrogram and mfcc are the same. We just follow the # kwargs for melspectrogram and mfcc are the same. We just follow the
...@@ -226,14 +278,24 @@ class TestTransforms(common_utils.TorchaudioTestCase): ...@@ -226,14 +278,24 @@ class TestTransforms(common_utils.TorchaudioTestCase):
librosa_mfcc = scipy.fftpack.dct(db_librosa, axis=0, type=2, norm='ortho')[:n_mfcc] librosa_mfcc = scipy.fftpack.dct(db_librosa, axis=0, type=2, norm='ortho')[:n_mfcc]
librosa_mfcc_tensor = torch.from_numpy(librosa_mfcc) librosa_mfcc_tensor = torch.from_numpy(librosa_mfcc)
melkwargs = {'hop_length': hop_length, 'n_fft': n_fft}
mfcc_transform = torchaudio.transforms.MFCC(
sample_rate=sample_rate, n_mfcc=n_mfcc, norm='ortho', melkwargs=melkwargs)
torch_mfcc = mfcc_transform(sound).squeeze().cpu() torch_mfcc = mfcc_transform(sound).squeeze().cpu()
self.assertEqual( self.assertEqual(
torch_mfcc.type(librosa_mfcc_tensor.dtype), librosa_mfcc_tensor, atol=5e-3, rtol=1e-5) torch_mfcc.type(librosa_mfcc_tensor.dtype), librosa_mfcc_tensor, atol=5e-3, rtol=1e-5)
self.assert_compatibilities_spectral_centroid(sample_rate, n_fft, hop_length, sound, sound_librosa) @parameterized.expand([
param(n_fft=400, hop_length=200),
def assert_compatibilities_spectral_centroid(self, sample_rate, n_fft, hop_length, sound, sound_librosa): param(n_fft=600, hop_length=100),
param(n_fft=200, hop_length=50),
])
def test_spectral_centroid(self, n_fft, hop_length):
sample_rate = 16000
sound = common_utils.get_sinusoid(n_channels=1, sample_rate=sample_rate)
sound_librosa = sound.cpu().numpy().squeeze()
spect_centroid = torchaudio.transforms.SpectralCentroid( spect_centroid = torchaudio.transforms.SpectralCentroid(
sample_rate=sample_rate, n_fft=n_fft, hop_length=hop_length) sample_rate=sample_rate, n_fft=n_fft, hop_length=hop_length)
out_torch = spect_centroid(sound).squeeze().cpu() out_torch = spect_centroid(sound).squeeze().cpu()
...@@ -244,52 +306,6 @@ class TestTransforms(common_utils.TorchaudioTestCase): ...@@ -244,52 +306,6 @@ class TestTransforms(common_utils.TorchaudioTestCase):
self.assertEqual(out_torch.type(out_librosa.dtype), out_librosa, atol=1e-5, rtol=1e-5) self.assertEqual(out_torch.type(out_librosa.dtype), out_librosa, atol=1e-5, rtol=1e-5)
def test_basics1(self):
kwargs = {
'n_fft': 400,
'hop_length': 200,
'power': 2.0,
'n_mels': 128,
'n_mfcc': 40,
'sample_rate': 16000
}
self.assert_compatibilities(**kwargs)
def test_basics2(self):
kwargs = {
'n_fft': 600,
'hop_length': 100,
'power': 2.0,
'n_mels': 128,
'n_mfcc': 20,
'sample_rate': 16000
}
self.assert_compatibilities(**kwargs)
# NOTE: Test passes offline, but fails on TravisCI (and CircleCI), see #372.
@unittest.skipIf('CI' in os.environ, 'Test is known to fail on CI')
def test_basics3(self):
kwargs = {
'n_fft': 200,
'hop_length': 50,
'power': 2.0,
'n_mels': 128,
'n_mfcc': 50,
'sample_rate': 24000
}
self.assert_compatibilities(**kwargs)
def test_basics4(self):
kwargs = {
'n_fft': 400,
'hop_length': 200,
'power': 3.0,
'n_mels': 128,
'n_mfcc': 40,
'sample_rate': 16000
}
self.assert_compatibilities(**kwargs)
def test_MelScale(self): def test_MelScale(self):
"""MelScale transform is comparable to that of librosa""" """MelScale transform is comparable to that of librosa"""
n_fft = 2048 n_fft = 2048
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment