Unverified Commit 995b75f8 authored by Vincent QB's avatar Vincent QB Committed by GitHub
Browse files

add slaney normalization (#589)

* add slaney normalization.

* add torchscript.

* convert to string for torchscript compatibility.

* flake8.

* use string as default.
parent adb3d3da
...@@ -53,19 +53,20 @@ class TestFunctional(_LibrosaMixin, unittest.TestCase): ...@@ -53,19 +53,20 @@ class TestFunctional(_LibrosaMixin, unittest.TestCase):
torch.testing.assert_allclose(ta_out, lr_out, atol=5e-5, rtol=1e-5) torch.testing.assert_allclose(ta_out, lr_out, atol=5e-5, rtol=1e-5)
def _test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0): def _test_create_fb(self, n_mels=40, sample_rate=22050, n_fft=2048, fmin=0.0, fmax=8000.0, norm=None):
librosa_fb = librosa.filters.mel(sr=sample_rate, librosa_fb = librosa.filters.mel(sr=sample_rate,
n_fft=n_fft, n_fft=n_fft,
n_mels=n_mels, n_mels=n_mels,
fmax=fmax, fmax=fmax,
fmin=fmin, fmin=fmin,
htk=True, htk=True,
norm=None) norm=norm)
fb = F.create_fb_matrix(sample_rate=sample_rate, fb = F.create_fb_matrix(sample_rate=sample_rate,
n_mels=n_mels, n_mels=n_mels,
f_max=fmax, f_max=fmax,
f_min=fmin, f_min=fmin,
n_freqs=(n_fft // 2 + 1)) n_freqs=(n_fft // 2 + 1),
norm=norm)
for i_mel_bank in range(n_mels): for i_mel_bank in range(n_mels):
torch.testing.assert_allclose(fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]), torch.testing.assert_allclose(fb[:, i_mel_bank], torch.tensor(librosa_fb[i_mel_bank]),
...@@ -79,6 +80,12 @@ class TestFunctional(_LibrosaMixin, unittest.TestCase): ...@@ -79,6 +80,12 @@ class TestFunctional(_LibrosaMixin, unittest.TestCase):
self._test_create_fb(n_mels=56, fmin=800.0, fmax=900.0) self._test_create_fb(n_mels=56, fmin=800.0, fmax=900.0)
self._test_create_fb(n_mels=56, fmin=1900.0, fmax=900.0) self._test_create_fb(n_mels=56, fmin=1900.0, fmax=900.0)
self._test_create_fb(n_mels=10, fmin=1900.0, fmax=900.0) self._test_create_fb(n_mels=10, fmin=1900.0, fmax=900.0)
self._test_create_fb(n_mels=128, sample_rate=44100, norm="slaney")
self._test_create_fb(n_mels=128, fmin=2000.0, fmax=5000.0, norm="slaney")
self._test_create_fb(n_mels=56, fmin=100.0, fmax=9000.0, norm="slaney")
self._test_create_fb(n_mels=56, fmin=800.0, fmax=900.0, norm="slaney")
self._test_create_fb(n_mels=56, fmin=1900.0, fmax=900.0, norm="slaney")
self._test_create_fb(n_mels=10, fmin=1900.0, fmax=900.0, norm="slaney")
def test_amplitude_to_DB(self): def test_amplitude_to_DB(self):
spec = torch.rand((6, 201)) spec = torch.rand((6, 201))
......
...@@ -96,7 +96,8 @@ class Functional(common_utils.TestBaseMixin): ...@@ -96,7 +96,8 @@ class Functional(common_utils.TestBaseMixin):
f_max = 20.0 f_max = 20.0
n_mels = 10 n_mels = 10
sample_rate = 16000 sample_rate = 16000
return F.create_fb_matrix(n_stft, f_min, f_max, n_mels, sample_rate) norm = ""
return F.create_fb_matrix(n_stft, f_min, f_max, n_mels, sample_rate, norm)
dummy = torch.zeros(1, 1) dummy = torch.zeros(1, 1)
self._assert_consistency(func, dummy) self._assert_consistency(func, dummy)
......
...@@ -335,7 +335,8 @@ def create_fb_matrix( ...@@ -335,7 +335,8 @@ def create_fb_matrix(
f_min: float, f_min: float,
f_max: float, f_max: float,
n_mels: int, n_mels: int,
sample_rate: int sample_rate: int,
norm: str = "",
) -> Tensor: ) -> Tensor:
r"""Create a frequency bin conversion matrix. r"""Create a frequency bin conversion matrix.
...@@ -345,6 +346,8 @@ def create_fb_matrix( ...@@ -345,6 +346,8 @@ def create_fb_matrix(
f_max (float): Maximum frequency (Hz) f_max (float): Maximum frequency (Hz)
n_mels (int): Number of mel filterbanks n_mels (int): Number of mel filterbanks
sample_rate (int): Sample rate of the audio waveform sample_rate (int): Sample rate of the audio waveform
norm (str): If 'slaney', divide the triangular mel weights by the width of the mel band
(area normalization). (Default: '')
Returns: Returns:
Tensor: Triangular filter banks (fb matrix) of size (``n_freqs``, ``n_mels``) Tensor: Triangular filter banks (fb matrix) of size (``n_freqs``, ``n_mels``)
...@@ -372,6 +375,12 @@ def create_fb_matrix( ...@@ -372,6 +375,12 @@ def create_fb_matrix(
down_slopes = (-1.0 * slopes[:, :-2]) / f_diff[:-1] # (n_freqs, n_mels) down_slopes = (-1.0 * slopes[:, :-2]) / f_diff[:-1] # (n_freqs, n_mels)
up_slopes = slopes[:, 2:] / f_diff[1:] # (n_freqs, n_mels) up_slopes = slopes[:, 2:] / f_diff[1:] # (n_freqs, n_mels)
fb = torch.max(zero, torch.min(down_slopes, up_slopes)) fb = torch.max(zero, torch.min(down_slopes, up_slopes))
if norm == "slaney":
# Slaney-style mel is scaled to be approx constant energy per channel
enorm = 2.0 / (f_pts[2:n_mels + 2] - f_pts[:n_mels])
fb *= enorm.unsqueeze(0)
return fb return fb
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment