Commit e5d567c9 authored by hwangjeff's avatar hwangjeff Committed by Facebook GitHub Bot
Browse files

Fix librosa calls (#2208)

Summary:
Yesterday's release of librosa 0.9.0 made args keyword-only and changed default padding from "reflect" to "zero" for some functions. This PR adjusts callsites in our tutorials and tests accordingly.

Pull Request resolved: https://github.com/pytorch/audio/pull/2208

Reviewed By: mthrok

Differential Revision: D34099793

Pulled By: hwangjeff

fbshipit-source-id: 4e2642cdda8aae6d0a928befaf1bbb3873d229bc
parent a1dc9e0a
......@@ -306,8 +306,8 @@ plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio")
mel_filters_librosa = librosa.filters.mel(
sample_rate,
n_fft,
sr=sample_rate,
n_fft=n_fft,
n_mels=n_mels,
fmin=0.0,
fmax=sample_rate / 2.0,
......@@ -365,7 +365,7 @@ plot_spectrogram(melspec[0], title="MelSpectrogram - torchaudio", ylabel="mel fr
melspec_librosa = librosa.feature.melspectrogram(
waveform.numpy()[0],
y=waveform.numpy()[0],
sr=sample_rate,
n_fft=n_fft,
hop_length=hop_length,
......
......@@ -190,7 +190,7 @@ def benchmark_resample(
waveform_np = waveform.squeeze().numpy()
begin = time.time()
for _ in range(iters):
librosa.resample(waveform_np, sample_rate, resample_rate, res_type=librosa_type)
librosa.resample(waveform_np, orig_sr=sample_rate, target_sr=resample_rate, res_type=librosa_type)
elapsed = time.time() - begin
return elapsed / iters
......@@ -343,7 +343,7 @@ resampled_waveform = F.resample(
plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Best (torchaudio)")
librosa_resampled_waveform = torch.from_numpy(
librosa.resample(waveform.squeeze().numpy(), sample_rate, resample_rate, res_type="kaiser_best")
librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_best")
).unsqueeze(0)
plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Best (librosa)")
......@@ -363,7 +363,7 @@ resampled_waveform = F.resample(
plot_specgram(resampled_waveform, resample_rate, title="Kaiser Window Fast (torchaudio)")
librosa_resampled_waveform = torch.from_numpy(
librosa.resample(waveform.squeeze().numpy(), sample_rate, resample_rate, res_type="kaiser_fast")
librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_fast")
).unsqueeze(0)
plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Fast (librosa)")
......
......@@ -62,6 +62,7 @@ class Functional(TestBaseMixin):
momentum=momentum,
init=None,
length=waveform.size(1),
pad_mode="reflect",
)[None, ...]
self.assertEqual(result, torch.from_numpy(expected), atol=5e-5, rtol=1e-07)
......
......@@ -36,7 +36,7 @@ class TransformsTestBase(TestBaseMixin):
).to(self.device, self.dtype)
expected = librosa.core.spectrum._spectrogram(
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=power
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=power, pad_mode="reflect"
)[0]
result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=power,).to(self.device, self.dtype)(
......@@ -54,7 +54,7 @@ class TransformsTestBase(TestBaseMixin):
).to(self.device, self.dtype)
expected = librosa.core.spectrum._spectrogram(
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1
y=waveform[0].cpu().numpy(), n_fft=n_fft, hop_length=hop_length, power=1, pad_mode="reflect"
)[0]
result = T.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=None, return_complex=True,).to(
......@@ -86,6 +86,7 @@ class TransformsTestBase(TestBaseMixin):
n_mels=n_mels,
norm=norm,
htk=mel_scale == "htk",
pad_mode="reflect",
)
result = T.MelSpectrogram(
sample_rate=sample_rate,
......@@ -136,6 +137,7 @@ class TransformsTestBase(TestBaseMixin):
n_mels=n_mels,
htk=True,
norm=None,
pad_mode="reflect",
)
expected = librosa.feature.mfcc(
S=librosa.core.spectrum.power_to_db(melspec), n_mfcc=n_mfcc, dct_type=2, norm="ortho"
......@@ -157,6 +159,6 @@ class TransformsTestBase(TestBaseMixin):
self.device, self.dtype
)(waveform)
expected = librosa.feature.spectral_centroid(
y=waveform[0].cpu().numpy(), sr=sample_rate, n_fft=n_fft, hop_length=hop_length
y=waveform[0].cpu().numpy(), sr=sample_rate, n_fft=n_fft, hop_length=hop_length, pad_mode="reflect"
)
self.assertEqual(result, torch.from_numpy(expected), atol=5e-4, rtol=1e-5)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment