Unverified Commit 9a0e70ea authored by moto's avatar moto Committed by GitHub
Browse files

Run functional librosa compatibility test on CUDA as well (#1436)

parent 52e2943c
...@@ -2,6 +2,7 @@ from .data_utils import ( ...@@ -2,6 +2,7 @@ from .data_utils import (
get_asset_path, get_asset_path,
get_whitenoise, get_whitenoise,
get_sinusoid, get_sinusoid,
get_spectrogram,
) )
from .backend_utils import ( from .backend_utils import (
set_audio_backend, set_audio_backend,
...@@ -30,8 +31,28 @@ from .parameterized_utils import ( ...@@ -30,8 +31,28 @@ from .parameterized_utils import (
nested_params nested_params
) )
__all__ = ['get_asset_path', 'get_whitenoise', 'get_sinusoid', 'set_audio_backend', __all__ = [
'TempDirMixin', 'HttpServerMixin', 'TestBaseMixin', 'PytorchTestCase', 'TorchaudioTestCase', 'get_asset_path',
'skipIfNoCuda', 'skipIfNoExec', 'skipIfNoModule', 'skipIfNoKaldi', 'skipIfNoSox', 'get_whitenoise',
'skipIfNoSoxBackend', 'skipIfRocm', 'get_wav_data', 'normalize_wav', 'load_wav', 'save_wav', 'get_sinusoid',
'load_params', 'nested_params'] 'get_spectrogram',
'set_audio_backend',
'TempDirMixin',
'HttpServerMixin',
'TestBaseMixin',
'PytorchTestCase',
'TorchaudioTestCase',
'skipIfNoCuda',
'skipIfNoExec',
'skipIfNoModule',
'skipIfNoKaldi',
'skipIfNoSox',
'skipIfNoSoxBackend',
'skipIfRocm',
'get_wav_data',
'normalize_wav',
'load_wav',
'save_wav',
'load_params',
'nested_params',
]
import os.path import os.path
from typing import Union from typing import Union, Optional
import torch import torch
...@@ -62,7 +62,7 @@ def get_whitenoise( ...@@ -62,7 +62,7 @@ def get_whitenoise(
""" """
if isinstance(dtype, str): if isinstance(dtype, str):
dtype = getattr(torch, dtype) dtype = getattr(torch, dtype)
if dtype not in [torch.float32, torch.int32, torch.int16, torch.uint8]: if dtype not in [torch.float64, torch.float32, torch.int32, torch.int16, torch.uint8]:
raise NotImplementedError(f'dtype {dtype} is not supported.') raise NotImplementedError(f'dtype {dtype} is not supported.')
# According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices, # According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices,
# so we only fork on CPU, generate values and move the data to the given device # so we only fork on CPU, generate values and move the data to the given device
...@@ -110,3 +110,43 @@ def get_sinusoid( ...@@ -110,3 +110,43 @@ def get_sinusoid(
if not channels_first: if not channels_first:
tensor = tensor.t() tensor = tensor.t()
return convert_tensor_encoding(tensor, dtype) return convert_tensor_encoding(tensor, dtype)
def get_spectrogram(
waveform,
*,
n_fft: int = 2048,
hop_length: Optional[int] = None,
win_length: Optional[int] = None,
window: Optional[torch.Tensor] = None,
center: bool = True,
pad_mode: str = 'reflect',
power: Optional[float] = None,
):
"""Generate a spectrogram of the given Tensor
Args:
n_fft: The number of FFT bins.
hop_length: Stride for sliding window. default: ``n_fft // 4``.
win_length: The size of window frame and STFT filter. default: ``n_fft``.
winwdow: Window function. default: Hann window
center: Pad the input sequence if True. See ``torch.stft`` for the detail.
pad_mode: Padding method used when center is True. Default: "reflect".
power: If ``None``, raw spectrogram with complex values are returned,
otherwise the norm of the spectrogram is returned.
"""
hop_length = hop_length or n_fft // 4
win_length = win_length or n_fft
window = torch.hann_window(win_length) if window is None else window
spec = torch.stft(
waveform,
n_fft=n_fft,
hop_length=hop_length,
win_length=win_length,
center=center,
window=window,
pad_mode=pad_mode,
return_complex=True)
if power is not None:
spec = spec.abs() ** power
return spec
...@@ -11,17 +11,41 @@ def load_params(*paths): ...@@ -11,17 +11,41 @@ def load_params(*paths):
return [param(json.loads(line)) for line in file] return [param(json.loads(line)) for line in file]
def nested_params(*params): def _name_func(func, _, params):
def _name_func(func, _, params): strs = []
strs = [] for arg in params.args:
for arg in params.args: if isinstance(arg, tuple):
if isinstance(arg, tuple): strs.append("_".join(str(a) for a in arg))
strs.append("_".join(str(a) for a in arg)) else:
else: strs.append(str(arg))
strs.append(str(arg)) return f'{func.__name__}_{"_".join(strs)}'
return f'{func.__name__}_{"_".join(strs)}'
return parameterized.expand( def nested_params(*params_set):
list(product(*params)), """Generate the cartesian product of the given list of parameters.
name_func=_name_func
) Args:
params_set (list of parameters): Parameters. When using ``parameterized.param`` class,
all the parameters have to be specified with the class, only using kwargs.
"""
flatten = [p for params in params_set for p in params]
# Parameters to be nested are given as list of plain objects
if all(not isinstance(p, param) for p in flatten):
args = list(product(*params_set))
return parameterized.expand(args, name_func=_name_func)
# Parameters to be nested are given as list of `parameterized.param`
if not all(isinstance(p, param) for p in flatten):
raise TypeError(
"When using ``parameterized.param``, "
"all the parameters have to be of the ``param`` type.")
if any(p.args for p in flatten):
raise ValueError(
"When using ``parameterized.param``, "
"all the parameters have to be provided as keyword argument."
)
args = [param()]
for params in params_set:
args = [param(**x.kwargs, **y.kwargs) for x in args for y in params]
return parameterized.expand(args)
from torchaudio_unittest.common_utils import PytorchTestCase
from .librosa_compatibility_test_impl import Functional, FunctionalComplex
class TestFunctionalCPU(Functional, PytorchTestCase):
device = 'cpu'
class TestFunctionalComplexCPU(FunctionalComplex, PytorchTestCase):
device = 'cpu'
from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoCuda
from .librosa_compatibility_test_impl import Functional, FunctionalComplex
@skipIfNoCuda
class TestFunctionalCUDA(Functional, PytorchTestCase):
device = 'cuda'
@skipIfNoCuda
class TestFunctionalComplexCUDA(FunctionalComplex, PytorchTestCase):
device = 'cuda'
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
from distutils.version import StrictVersion from distutils.version import StrictVersion
import torch import torch
from parameterized import parameterized, param from parameterized import param
import torchaudio.functional as F import torchaudio.functional as F
from torchaudio._internal.module_utils import is_module_available from torchaudio._internal.module_utils import is_module_available
...@@ -13,44 +13,58 @@ if LIBROSA_AVAILABLE: ...@@ -13,44 +13,58 @@ if LIBROSA_AVAILABLE:
import numpy as np import numpy as np
import librosa import librosa
from torchaudio_unittest import common_utils
from torchaudio_unittest.common_utils import ( from torchaudio_unittest.common_utils import (
TestBaseMixin,
nested_params, nested_params,
get_whitenoise,
get_spectrogram,
) )
@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available") @unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
class TestFunctional(common_utils.TorchaudioTestCase): class Functional(TestBaseMixin):
"""Test suite for functions in `functional` module.""" """Test suite for functions in `functional` module."""
def test_griffinlim(self): dtype = torch.float64
# NOTE: This test is flaky without a fixed random seed
# See https://github.com/pytorch/audio/issues/382
torch.random.manual_seed(42)
tensor = torch.rand((1, 1000))
@nested_params([0, 0.99])
def test_griffinlim(self, momentum):
# FFT params
n_fft = 400 n_fft = 400
ws = 400 win_length = n_fft
hop = 100 hop_length = n_fft // 4
window = torch.hann_window(ws) window = torch.hann_window(win_length)
normalize = False power = 1
momentum = 0.99 # GriffinLim params
n_iter = 8 n_iter = 8
length = 1000
rand_init = False waveform = get_whitenoise(device=self.device, dtype=self.dtype)
init = 'random' if rand_init else None specgram = get_spectrogram(
waveform, n_fft=n_fft, hop_length=hop_length, power=power,
specgram = F.spectrogram(tensor, 0, window, n_fft, hop, ws, 2, normalize).sqrt() win_length=win_length, window=window)
ta_out = F.griffinlim(specgram, window, n_fft, hop, ws, 1,
n_iter, momentum, length, rand_init) result = F.griffinlim(
lr_out = librosa.griffinlim(specgram.squeeze(0).numpy(), n_iter=n_iter, hop_length=hop, specgram,
momentum=momentum, init=init, length=length) window=window,
lr_out = torch.from_numpy(lr_out).unsqueeze(0) n_fft=n_fft,
hop_length=hop_length,
self.assertEqual(ta_out, lr_out, atol=5e-5, rtol=1e-5) win_length=win_length,
power=power,
@parameterized.expand([ n_iter=n_iter,
param(norm=norm, mel_scale=mel_scale, **p.kwargs) momentum=momentum,
for p in [ length=waveform.size(1),
rand_init=False)
expected = librosa.griffinlim(
specgram[0].cpu().numpy(),
n_iter=n_iter,
hop_length=hop_length,
momentum=momentum,
init=None,
length=waveform.size(1))[None, ...]
self.assertEqual(result, torch.from_numpy(expected), atol=5e-5, rtol=1e-07)
@nested_params(
[
param(), param(),
param(n_mels=128, sample_rate=44100), param(n_mels=128, sample_rate=44100),
param(n_mels=128, fmin=2000.0, fmax=5000.0), param(n_mels=128, fmin=2000.0, fmax=5000.0),
...@@ -58,62 +72,60 @@ class TestFunctional(common_utils.TorchaudioTestCase): ...@@ -58,62 +72,60 @@ class TestFunctional(common_utils.TorchaudioTestCase):
param(n_mels=56, fmin=800.0, fmax=900.0), param(n_mels=56, fmin=800.0, fmax=900.0),
param(n_mels=56, fmin=1900.0, fmax=900.0), param(n_mels=56, fmin=1900.0, fmax=900.0),
param(n_mels=10, fmin=1900.0, fmax=900.0), param(n_mels=10, fmin=1900.0, fmax=900.0),
] ],
for norm in [None, 'slaney'] [param(norm=n) for n in [None, 'slaney']],
for mel_scale in ['htk', 'slaney'] [param(mel_scale=s) for s in ['htk', 'slaney']],
]) )
def test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, def test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048,
fmin=0.0, fmax=8000.0, norm=None, mel_scale="htk"): fmin=0.0, fmax=8000.0, norm=None, mel_scale="htk"):
if (norm == "slaney" and StrictVersion(librosa.__version__) < StrictVersion("0.7.2")): if (norm == "slaney" and StrictVersion(librosa.__version__) < StrictVersion("0.7.2")):
self.skipTest('Test is known to fail with older versions of librosa.') self.skipTest('Test is known to fail with older versions of librosa.')
if self.device != 'cpu':
librosa_fb = librosa.filters.mel(sr=sample_rate, self.skipTest('No need to run this test on CUDA')
n_fft=n_fft,
n_mels=n_mels, expected = librosa.filters.mel(
fmax=fmax, sr=sample_rate,
fmin=fmin, n_fft=n_fft,
htk=mel_scale == "htk", n_mels=n_mels,
norm=norm) fmax=fmax,
fb = F.create_fb_matrix(sample_rate=sample_rate, fmin=fmin,
n_mels=n_mels, htk=mel_scale == "htk",
f_max=fmax, norm=norm).T
f_min=fmin, result = F.create_fb_matrix(
n_freqs=(n_fft // 2 + 1), sample_rate=sample_rate,
norm=norm, n_mels=n_mels,
mel_scale=mel_scale) f_max=fmax,
f_min=fmin,
for i_mel_bank in range(n_mels): n_freqs=(n_fft // 2 + 1),
self.assertEqual( norm=norm,
fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]), atol=1e-4, rtol=1e-5) mel_scale=mel_scale)
self.assertEqual(result, torch.from_numpy(expected), atol=7e-5, rtol=1.3e-6)
def test_amplitude_to_DB(self):
spec = torch.rand((6, 201)) def test_amplitude_to_DB_power(self):
amin = 1e-10 amin = 1e-10
db_multiplier = 0.0 db_multiplier = 0.0
top_db = 80.0 top_db = 80.0
# Power to DB
multiplier = 10.0 multiplier = 10.0
ta_out = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db) spec = get_spectrogram(get_whitenoise(device=self.device, dtype=self.dtype), power=2)
lr_out = librosa.core.power_to_db(spec.numpy()) result = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)
lr_out = torch.from_numpy(lr_out) expected = librosa.core.power_to_db(spec[0].cpu().numpy())[None, ...]
self.assertEqual(result, torch.from_numpy(expected))
self.assertEqual(ta_out, lr_out, atol=5e-5, rtol=1e-5) def test_amplitude_to_DB(self):
amin = 1e-10
# Amplitude to DB db_multiplier = 0.0
top_db = 80.0
multiplier = 20.0 multiplier = 20.0
ta_out = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db) spec = get_spectrogram(get_whitenoise(device=self.device, dtype=self.dtype), power=1)
lr_out = librosa.core.amplitude_to_db(spec.numpy()) result = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)
lr_out = torch.from_numpy(lr_out) expected = librosa.core.amplitude_to_db(spec[0].cpu().numpy())[None, ...]
self.assertEqual(result, torch.from_numpy(expected))
self.assertEqual(ta_out, lr_out, atol=5e-5, rtol=1e-5)
@unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available") @unittest.skipIf(not LIBROSA_AVAILABLE, "Librosa not available")
class TestFunctionalComplex(common_utils.TorchaudioTestCase): class FunctionalComplex(TestBaseMixin):
@nested_params( @nested_params(
[0.5, 1.01, 1.3], [0.5, 1.01, 1.3],
[True, False], [True, False],
...@@ -127,11 +139,12 @@ class TestFunctionalComplex(common_utils.TorchaudioTestCase): ...@@ -127,11 +139,12 @@ class TestFunctionalComplex(common_utils.TorchaudioTestCase):
# Due to cummulative sum, numerical error in using torch.float32 will # Due to cummulative sum, numerical error in using torch.float32 will
# result in bottom right values of the stretched sectrogram to not # result in bottom right values of the stretched sectrogram to not
# match with librosa. # match with librosa.
spec = torch.randn(num_freq, num_frames, dtype=torch.complex128) spec = torch.randn(num_freq, num_frames, device=self.device, dtype=torch.complex128)
phase_advance = torch.linspace( phase_advance = torch.linspace(
0, 0,
np.pi * hop_length, np.pi * hop_length,
num_freq, num_freq,
device=self.device,
dtype=torch.float64)[..., None] dtype=torch.float64)[..., None]
stretched = F.phase_vocoder( stretched = F.phase_vocoder(
...@@ -139,7 +152,7 @@ class TestFunctionalComplex(common_utils.TorchaudioTestCase): ...@@ -139,7 +152,7 @@ class TestFunctionalComplex(common_utils.TorchaudioTestCase):
rate=rate, phase_advance=phase_advance) rate=rate, phase_advance=phase_advance)
expected_stretched = librosa.phase_vocoder( expected_stretched = librosa.phase_vocoder(
spec.numpy(), spec.cpu().numpy(),
rate=rate, rate=rate,
hop_length=hop_length) hop_length=hop_length)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment