Unverified Commit d11ad6bb authored by engineerchuan's avatar engineerchuan Committed by GitHub
Browse files

Stop using whitenoise.wav, mp3 and torchaudio.load in sox effect test

Part of #764

 - Replace `whitenoise.wav` with on-the-fly data generation
 - Replace `torchaudio.load` with `common_utils.load_wav`
 - Replace `steam-train-whistle-daniel_simon.mp3` with `.wav`
parent 4b3e9052
...@@ -21,9 +21,10 @@ def get_whitenoise( ...@@ -21,9 +21,10 @@ def get_whitenoise(
seed: int = 0, seed: int = 0,
dtype: Union[str, torch.dtype] = "float32", dtype: Union[str, torch.dtype] = "float32",
device: Union[str, torch.device] = "cpu", device: Union[str, torch.device] = "cpu",
channels_first=True,
scale_factor: float = 1,
): ):
"""Generate pseudo audio data with whitenoise """Generate pseudo audio data with whitenoise
Args: Args:
sample_rate: Sampling rate sample_rate: Sampling rate
duration: Length of the resulting Tensor in seconds. duration: Length of the resulting Tensor in seconds.
...@@ -32,19 +33,34 @@ def get_whitenoise( ...@@ -32,19 +33,34 @@ def get_whitenoise(
Note that this function does not modify global random generator state. Note that this function does not modify global random generator state.
dtype: Torch dtype dtype: Torch dtype
device: device device: device
channels_first: whether first dimension is n_channels
scale_factor: scale the Tensor before clamping and quantization
Returns: Returns:
Tensor: shape of (n_channels, sample_rate * duration) Tensor: shape of (n_channels, sample_rate * duration)
""" """
if isinstance(dtype, str): if isinstance(dtype, str):
dtype = getattr(torch, dtype) dtype = getattr(torch, dtype)
shape = [n_channels, sample_rate * duration] if dtype not in [torch.float32, torch.int32, torch.int16, torch.uint8]:
raise NotImplementedError(f'dtype {dtype} is not supported.')
# According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices, # According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices,
# so we only folk on CPU, generate values and move the data to the given device # so we only folk on CPU, generate values and move the data to the given device
with torch.random.fork_rng([]): with torch.random.fork_rng([]):
torch.random.manual_seed(seed) torch.random.manual_seed(seed)
tensor = torch.randn(shape, dtype=dtype, device='cpu') tensor = torch.randn([sample_rate * duration], dtype=torch.float32, device='cpu')
tensor /= 2.0 tensor /= 2.0
tensor *= scale_factor
tensor.clamp_(-1.0, 1.0) tensor.clamp_(-1.0, 1.0)
if dtype == torch.int32:
tensor *= (tensor > 0) * 2147483647 + (tensor < 0) * 2147483648
if dtype == torch.int16:
tensor *= (tensor > 0) * 32767 + (tensor < 0) * 32768
if dtype == torch.uint8:
tensor *= (tensor > 0) * 127 + (tensor < 0) * 128
tensor += 128
tensor = tensor.to(dtype)
tensor = tensor.repeat([n_channels, 1])
if not channels_first:
tensor = tensor.t()
return tensor.to(device=device) return tensor.to(device=device)
......
...@@ -9,12 +9,30 @@ from . import common_utils ...@@ -9,12 +9,30 @@ from . import common_utils
@common_utils.skipIfNoSoxBackend @common_utils.skipIfNoSoxBackend
class TestFunctionalFiltering(common_utils.TorchaudioTestCase): class TestFunctionalFiltering(common_utils.TempDirMixin, common_utils.TorchaudioTestCase):
backend = 'sox' backend = 'sox'
def setUp(self):
# 1. Create int16 signal to save as PCM wav
# 2. Write to temp file
# 3. Load temp file into tensor to reuse in downstream tests
# Prefer to use common_utils.load_wav() but this implementation does
# not match torchaudio.load and errors on downstream tests
super().setUp()
self.NOISE_SAMPLE_RATE = 44100 # N.B. 44.1 kHz required by SoX deemph effect
noise_waveform_as_int = common_utils.get_whitenoise(
sample_rate=self.NOISE_SAMPLE_RATE, duration=5, dtype=torch.int16, scale_factor=0.9,
)
self.noise_filepath = self.get_temp_path("whitenoise.wav")
common_utils.save_wav(
self.noise_filepath, noise_waveform_as_int, self.NOISE_SAMPLE_RATE
)
self.noise_waveform, _ = torchaudio.load(self.noise_filepath, normalization=True)
def test_gain(self): def test_gain(self):
test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav') test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
waveform, _ = torchaudio.load(test_filepath) waveform, _ = common_utils.load_wav(test_filepath)
waveform_gain = F.gain(waveform, 3) waveform_gain = F.gain(waveform, 3)
self.assertTrue(waveform_gain.abs().max().item(), 1.) self.assertTrue(waveform_gain.abs().max().item(), 1.)
...@@ -28,7 +46,7 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -28,7 +46,7 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
def test_dither(self): def test_dither(self):
test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav') test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
waveform, _ = torchaudio.load(test_filepath) waveform, _ = common_utils.load_wav(test_filepath)
waveform_dithered = F.dither(waveform) waveform_dithered = F.dither(waveform)
waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True) waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True)
...@@ -48,7 +66,7 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -48,7 +66,7 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
def test_vctk_transform_pipeline(self): def test_vctk_transform_pipeline(self):
test_filepath_vctk = common_utils.get_asset_path('VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav') test_filepath_vctk = common_utils.get_asset_path('VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav')
wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk) wf_vctk, sr_vctk = common_utils.load_wav(test_filepath_vctk)
# rate # rate
sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation') sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
...@@ -71,17 +89,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -71,17 +89,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad lowpass filter, compare to SoX implementation Test biquad lowpass filter, compare to SoX implementation
""" """
cutoff_freq = 3000 cutoff_freq = 3000
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("lowpass", [cutoff_freq]) E.append_effect_to_chain("lowpass", [cutoff_freq])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.lowpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, cutoff_freq)
output_waveform = F.lowpass_biquad(waveform, sample_rate, cutoff_freq)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -89,37 +104,30 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -89,37 +104,30 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad highpass filter, compare to SoX implementation Test biquad highpass filter, compare to SoX implementation
""" """
cutoff_freq = 2000 cutoff_freq = 2000
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("highpass", [cutoff_freq]) E.append_effect_to_chain("highpass", [cutoff_freq])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.highpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, cutoff_freq)
output_waveform = F.highpass_biquad(waveform, sample_rate, cutoff_freq)
# TBD - this fails at the 1e-4 level, debug why self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-3, rtol=1e-5)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-3, rtol=1e-5)
def test_allpass(self): def test_allpass(self):
""" """
Test biquad allpass filter, compare to SoX implementation Test biquad allpass filter, compare to SoX implementation
""" """
central_freq = 1000 central_freq = 1000
q = 0.707 q = 0.707
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q']) E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.allpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, central_freq, q)
output_waveform = F.allpass_biquad(waveform, sample_rate, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -127,19 +135,17 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -127,19 +135,17 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad bandpass filter, compare to SoX implementation Test biquad bandpass filter, compare to SoX implementation
""" """
central_freq = 1000 central_freq = 1000
q = 0.707 q = 0.707
const_skirt_gain = True const_skirt_gain = True
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q']) E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.bandpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE,
output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain) central_freq, q, const_skirt_gain)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -147,19 +153,17 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -147,19 +153,17 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad bandpass filter, compare to SoX implementation Test biquad bandpass filter, compare to SoX implementation
""" """
central_freq = 1000 central_freq = 1000
q = 0.707 q = 0.707
const_skirt_gain = False const_skirt_gain = False
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q']) E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.bandpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE,
output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain) central_freq, q, const_skirt_gain)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -167,18 +171,16 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -167,18 +171,16 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad bandreject filter, compare to SoX implementation Test biquad bandreject filter, compare to SoX implementation
""" """
central_freq = 1000 central_freq = 1000
q = 0.707 q = 0.707
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q']) E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.bandreject_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE,
output_waveform = F.bandreject_biquad(waveform, sample_rate, central_freq, q) central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -186,19 +188,16 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -186,19 +188,16 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad band filter with noise mode, compare to SoX implementation Test biquad band filter with noise mode, compare to SoX implementation
""" """
central_freq = 1000 central_freq = 1000
q = 0.707 q = 0.707
noise = True noise = True
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q']) E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.band_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, central_freq, q, noise)
output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -211,14 +210,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -211,14 +210,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
q = 0.707 q = 0.707
noise = False noise = False
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("band", [central_freq, str(q) + 'q']) E.append_effect_to_chain("band", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.band_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, central_freq, q, noise)
output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -231,14 +228,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -231,14 +228,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
q = 0.707 q = 0.707
gain = 40 gain = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q']) E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.treble_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, gain, central_freq, q)
output_waveform = F.treble_biquad(waveform, sample_rate, gain, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -251,14 +246,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -251,14 +246,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
q = 0.707 q = 0.707
gain = 40 gain = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("bass", [gain, central_freq, str(q) + 'q']) E.append_effect_to_chain("bass", [gain, central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.bass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, gain, central_freq, q)
output_waveform = F.bass_biquad(waveform, sample_rate, gain, central_freq, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-4, rtol=1e-5)
...@@ -266,15 +259,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -266,15 +259,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad deemph filter, compare to SoX implementation Test biquad deemph filter, compare to SoX implementation
""" """
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("deemph") E.append_effect_to_chain("deemph")
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.deemph_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE)
output_waveform = F.deemph_biquad(waveform, sample_rate)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -282,15 +272,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -282,15 +272,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
Test biquad riaa filter, compare to SoX implementation Test biquad riaa filter, compare to SoX implementation
""" """
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("riaa") E.append_effect_to_chain("riaa")
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.riaa_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE)
output_waveform = F.riaa_biquad(waveform, sample_rate)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -299,14 +286,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -299,14 +286,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
Test contrast effect, compare to SoX implementation Test contrast effect, compare to SoX implementation
""" """
enhancement_amount = 80. enhancement_amount = 80.
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("contrast", [enhancement_amount]) E.append_effect_to_chain("contrast", [enhancement_amount])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.contrast(self.noise_waveform, enhancement_amount)
output_waveform = F.contrast(waveform, enhancement_amount)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -316,14 +302,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -316,14 +302,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
shift = 0.5 shift = 0.5
limiter_gain = 0.05 limiter_gain = 0.05
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("dcshift", [shift, limiter_gain]) E.append_effect_to_chain("dcshift", [shift, limiter_gain])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, _ = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.dcshift(self.noise_waveform, shift, limiter_gain)
output_waveform = F.dcshift(waveform, shift, limiter_gain)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -332,14 +317,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -332,14 +317,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
Test dcshift effect, compare to SoX implementation Test dcshift effect, compare to SoX implementation
""" """
shift = 0.6 shift = 0.6
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("dcshift", [shift]) E.append_effect_to_chain("dcshift", [shift])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, _ = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.dcshift(self.noise_waveform, shift)
output_waveform = F.dcshift(waveform, shift)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -349,14 +333,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -349,14 +333,13 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
""" """
gain = 30 gain = 30
colour = 40 colour = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("overdrive", [gain, colour]) E.append_effect_to_chain("overdrive", [gain, colour])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, _ = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.overdrive(self.noise_waveform, gain, colour)
output_waveform = F.overdrive(waveform, gain, colour)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -369,14 +352,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -369,14 +352,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
delay_ms = 2.0 delay_ms = 2.0
decay = 0.4 decay = 0.4
speed = 0.5 speed = 0.5
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-s"]) E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-s"])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.phaser(self.noise_waveform, self.NOISE_SAMPLE_RATE,
output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True) gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -389,14 +372,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -389,14 +372,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
delay_ms = 2.0 delay_ms = 2.0
decay = 0.4 decay = 0.4
speed = 0.5 speed = 0.5
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-t"]) E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-t"])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.phaser(self.noise_waveform, self.NOISE_SAMPLE_RATE,
output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False) gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -410,15 +393,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -410,15 +393,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
width = 0.9 width = 0.9
speed = 0.5 speed = 0.5
phase = 30 phase = 30
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "linear"]) E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "linear"])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth, regen,
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase, width, speed, phase, modulation='triangular', interpolation='linear')
modulation='triangular', interpolation='linear')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -432,15 +414,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -432,15 +414,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
width = 0.4 width = 0.4
speed = 0.5 speed = 0.5
phase = 40 phase = 40
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "quadratic"]) E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "quadratic"])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth,
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase, regen, width, speed, phase, modulation='triangular', interpolation='quadratic')
modulation='triangular', interpolation='quadratic')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -454,15 +435,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -454,15 +435,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
width = 0.23 width = 0.23
speed = 1.3 speed = 1.3
phase = 60 phase = 60
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "linear"]) E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "linear"])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth,
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase, regen, width, speed, phase, modulation='sinusoidal', interpolation='linear')
modulation='sinusoidal', interpolation='linear')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -476,15 +456,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -476,15 +456,14 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
width = 0.23 width = 0.23
speed = 1.3 speed = 1.3
phase = 25 phase = 25
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "quadratic"]) E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "quadratic"])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth,
output_waveform = F.flanger(waveform, sample_rate, delay, depth, regen, width, speed, phase, regen, width, speed, phase, modulation='sinusoidal', interpolation='quadratic')
modulation='sinusoidal', interpolation='quadratic')
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
...@@ -497,21 +476,17 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -497,21 +476,17 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
q = 0.707 q = 0.707
gain = 1 gain = 1
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("equalizer", [center_freq, q, gain]) E.append_effect_to_chain("equalizer", [center_freq, q, gain])
sox_output_waveform, sr = E.sox_build_flow_effects() sox_output_waveform, sr = E.sox_build_flow_effects()
waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True) output_waveform = F.equalizer_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, center_freq, gain, q)
output_waveform = F.equalizer_biquad(waveform, sample_rate, center_freq, gain, q)
self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5) self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
def test_perf_biquad_filtering(self): def test_perf_biquad_filtering(self):
fn_sine = common_utils.get_asset_path('whitenoise.wav')
b0 = 0.4 b0 = 0.4
b1 = 0.2 b1 = 0.2
b2 = 0.9 b2 = 0.9
...@@ -521,13 +496,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase): ...@@ -521,13 +496,12 @@ class TestFunctionalFiltering(common_utils.TorchaudioTestCase):
# SoX method # SoX method
E = torchaudio.sox_effects.SoxEffectsChain() E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(fn_sine) E.set_input_file(self.noise_filepath)
E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2]) E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2])
waveform_sox_out, _ = E.sox_build_flow_effects() waveform_sox_out, _ = E.sox_build_flow_effects()
waveform, _ = torchaudio.load(fn_sine, normalization=True)
waveform_lfilter_out = F.lfilter( waveform_lfilter_out = F.lfilter(
waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2]) self.noise_waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2])
) )
self.assertEqual(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5) self.assertEqual(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5)
......
...@@ -11,7 +11,7 @@ from . import common_utils ...@@ -11,7 +11,7 @@ from . import common_utils
class Test_SoxEffectsChain(common_utils.TorchaudioTestCase): class Test_SoxEffectsChain(common_utils.TorchaudioTestCase):
backend = 'sox' backend = 'sox'
test_filepath = common_utils.get_asset_path("steam-train-whistle-daniel_simon.mp3") test_filepath = common_utils.get_asset_path("steam-train-whistle-daniel_simon.wav")
def test_single_channel(self): def test_single_channel(self):
fn_sine = common_utils.get_asset_path("sinewave.wav") fn_sine = common_utils.get_asset_path("sinewave.wav")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment