Commit e5d567c9 authored by hwangjeff's avatar hwangjeff Committed by Facebook GitHub Bot
Browse files

Fix librosa calls (#2208)

Summary:
Yesterday's release of librosa 0.9.0 made args keyword-only and changed default padding from "reflect" to "zero" for some functions. This PR adjusts callsites in our tutorials and tests accordingly.

Pull Request resolved: https://github.com/pytorch/audio/pull/2208

Reviewed By: mthrok

Differential Revision: D34099793

Pulled By: hwangjeff

fbshipit-source-id: 4e2642cdda8aae6d0a928befaf1bbb3873d229bc
parent a1dc9e0a
...@@ -306,8 +306,8 @@ plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio") ...@@ -306,8 +306,8 @@ plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio")
mel_filters_librosa = librosa.filters.mel( mel_filters_librosa = librosa.filters.mel(
sample_rate, sr=sample_rate,
n_fft, n_fft=n_fft,
n_mels=n_mels, n_mels=n_mels,
fmin=0.0, fmin=0.0,
fmax=sample_rate / 2.0, fmax=sample_rate / 2.0,
...@@ -365,7 +365,7 @@ plot_spectrogram(melspec[0], title="MelSpectrogram - torchaudio", ylabel="mel fr ...@@ -365,7 +365,7 @@ plot_spectrogram(melspec[0], title="MelSpectrogram - torchaudio", ylabel="mel fr
melspec_librosa = librosa.feature.melspectrogram( melspec_librosa = librosa.feature.melspectrogram(
waveform.numpy()[0], y=waveform.numpy()[0],
sr=sample_rate, sr=sample_rate,
n_fft=n_fft, n_fft=n_fft,
hop_length=hop_length, hop_length=hop_length,
......
...@@ -190,7 +190,7 @@ def benchmark_resample( ...@@ -190,7 +190,7 @@ def benchmark_resample(
waveform_np = waveform.squeeze().numpy() waveform_np = waveform.squeeze().numpy()
begin = time.time() begin = time.time()
for _ in range(iters): for _ in range(iters):
librosa.resample(waveform_np, sample_rate, resample_rate, res_type=librosa_type) librosa.resample(waveform_np, orig_sr=sample_rate, target_sr=resample_rate, res_type=librosa_type)
elapsed = time.time() - begin elapsed = time.time() - begin
return elapsed / iters return elapsed / iters
...@@ -343,7 +343,7 @@ resampled_waveform = F.resample( ...@@ -343,7 +343,7 @@ resampled_waveform = F.resample(
plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Best (torchaudio)") plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Best (torchaudio)")
librosa_resampled_waveform = torch.from_numpy( librosa_resampled_waveform = torch.from_numpy(
librosa.resample(waveform.squeeze().numpy(), sample_rate, resample_rate, res_type="kaiser_best") librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_best")
).unsqueeze(0) ).unsqueeze(0)
plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Best (librosa)") plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Best (librosa)")
...@@ -363,7 +363,7 @@ resampled_waveform = F.resample( ...@@ -363,7 +363,7 @@ resampled_waveform = F.resample(
plot_specgram(resampled_waveform, resample_rate, title="Kaiser Window Fast (torchaudio)") plot_specgram(resampled_waveform, resample_rate, title="Kaiser Window Fast (torchaudio)")
librosa_resampled_waveform = torch.from_numpy( librosa_resampled_waveform = torch.from_numpy(
librosa.resample(waveform.squeeze().numpy(), sample_rate, resample_rate, res_type="kaiser_fast") librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_fast")
).unsqueeze(0) ).unsqueeze(0)
plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Fast (librosa)") plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Fast (librosa)")
......
...@@ -62,6 +62,7 @@ class Functional(TestBaseMixin): ...@@ -62,6 +62,7 @@ class Functional(TestBaseMixin):
momentum=momentum, momentum=momentum,
init=None, init=None,
length=waveform.size(1), length=waveform.size(1),
pad_mode="reflect",
)[None, ...] )[None, ...]
self.assertEqual(result, torch.from_numpy(expected), atol=5e-5, rtol=1e-07) self.assertEqual(result, torch.from_numpy(expected), atol=5e-5, rtol=1e-07)
......
...@@ -36,7 +36,7 @@ class TransformsTestBase(TestBaseMixin): ...@@ -36,7 +36,7 @@ class TransformsTestBase(TestBaseMixin):
).to(self.device, self.dtype) ).to(self.device, self.dtype)
expected = librosa.core.spectrum._spectrogram( expected = librosa.core.spectrum._spectrogram(
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=power y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=power, pad_mode="reflect"
)[0] )[0]
result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=power,).to(self.device, self.dtype)( result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=power,).to(self.device, self.dtype)(
...@@ -54,7 +54,7 @@ class TransformsTestBase(TestBaseMixin): ...@@ -54,7 +54,7 @@ class TransformsTestBase(TestBaseMixin):
).to(self.device, self.dtype) ).to(self.device, self.dtype)
expected = librosa.core.spectrum._spectrogram( expected = librosa.core.spectrum._spectrogram(
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1 y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1, pad_mode="reflect"
)[0] )[0]
result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=None, return_complex=True,).to( result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=None, return_complex=True,).to(
...@@ -86,6 +86,7 @@ class TransformsTestBase(TestBaseMixin): ...@@ -86,6 +86,7 @@ class TransformsTestBase(TestBaseMixin):
n_mels=n_mels, n_mels=n_mels,
norm=norm, norm=norm,
htk=mel_scale == "htk", htk=mel_scale == "htk",
pad_mode="reflect",
) )
result = T.MelSpectrogram( result = T.MelSpectrogram(
sample_rate=sample_rate, sample_rate=sample_rate,
...@@ -136,6 +137,7 @@ class TransformsTestBase(TestBaseMixin): ...@@ -136,6 +137,7 @@ class TransformsTestBase(TestBaseMixin):
n_mels=n_mels, n_mels=n_mels,
htk=True, htk=True,
norm=None, norm=None,
pad_mode="reflect",
) )
expected = librosa.feature.mfcc( expected = librosa.feature.mfcc(
S=librosa.core.spectrum.power_to_db(melspec), n_mfcc=n_mfcc, dct_type=2, norm="ortho" S=librosa.core.spectrum.power_to_db(melspec), n_mfcc=n_mfcc, dct_type=2, norm="ortho"
...@@ -157,6 +159,6 @@ class TransformsTestBase(TestBaseMixin): ...@@ -157,6 +159,6 @@ class TransformsTestBase(TestBaseMixin):
self.device, self.dtype self.device, self.dtype
)(waveform) )(waveform)
expected = librosa.feature.spectral_centroid( expected = librosa.feature.spectral_centroid(
y=waveform[0].cpu().numpy(), sr=sample_rate, n_fft=n_fft, hop_length=hop_length y=waveform[0].cpu().numpy(), sr=sample_rate, n_fft=n_fft, hop_length=hop_length, pad_mode="reflect"
) )
self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5) self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment