test_sox_compatibility.py 16.5 KB
Newer Older
1
import os
2
3
import unittest

4
5
6
import torch
import torchaudio
import torchaudio.functional as F
7
8
import torchaudio.transforms as T

9
from common_utils import AudioBackendScope, BACKENDS, create_temp_assets_dir
10
11
12


class TestFunctionalFiltering(unittest.TestCase):
13
    test_dirpath, test_dir = create_temp_assets_dir()
14

15
    def _test_lfilter_basic(self, dtype, device):
16
17
18
19
20
21
22
        """
        Create a very basic signal,
        Then make a simple 4th order delay
        The output should be same as the input but shifted
        """

        torch.random.manual_seed(42)
23
24
25
        waveform = torch.rand(2, 44100 * 1, dtype=dtype, device=device)
        b_coeffs = torch.tensor([0, 0, 0, 1], dtype=dtype, device=device)
        a_coeffs = torch.tensor([1, 0, 0, 0], dtype=dtype, device=device)
26
27
        output_waveform = F.lfilter(waveform, a_coeffs, b_coeffs)

28
        torch.testing.assert_allclose(output_waveform[:, 3:], waveform[:, 0:-3], atol=1e-5, rtol=1e-5)
29

30
31
32
33
34
35
    def test_lfilter_basic(self):
        self._test_lfilter_basic(torch.float32, torch.device("cpu"))

    def test_lfilter_basic_double(self):
        self._test_lfilter_basic(torch.float64, torch.device("cpu"))

moto's avatar
moto committed
36
    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
37
    def test_lfilter_basic_gpu(self):
moto's avatar
moto committed
38
        self._test_lfilter_basic(torch.float32, torch.device("cuda:0"))
39
40

    def _test_lfilter(self, waveform, device):
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
        """
        Design an IIR lowpass filter using scipy.signal filter design
        https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirdesign.html#scipy.signal.iirdesign

        Example
          >>> from scipy.signal import iirdesign
          >>> b, a = iirdesign(0.2, 0.3, 1, 60)
        """

        b_coeffs = torch.tensor(
            [
                0.00299893,
                -0.0051152,
                0.00841964,
                -0.00747802,
                0.00841964,
                -0.0051152,
                0.00299893,
59
60
            ],
            device=device,
61
62
63
64
65
66
67
68
69
70
        )
        a_coeffs = torch.tensor(
            [
                1.0,
                -4.8155751,
                10.2217618,
                -12.14481273,
                8.49018171,
                -3.3066882,
                0.56088705,
71
72
            ],
            device=device,
73
74
75
76
77
78
79
        )

        output_waveform = F.lfilter(waveform, a_coeffs, b_coeffs)
        assert len(output_waveform.size()) == 2
        assert output_waveform.size(0) == waveform.size(0)
        assert output_waveform.size(1) == waveform.size(1)

80
81
    def test_lfilter(self):

82
        filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
83
84
85
86
        waveform, _ = torchaudio.load(filepath, normalization=True)

        self._test_lfilter(waveform, torch.device("cpu"))

moto's avatar
moto committed
87
    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
88
    def test_lfilter_gpu(self):
moto's avatar
moto committed
89
90
91
92
93
        filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
        waveform, _ = torchaudio.load(filepath, normalization=True)
        cuda0 = torch.device("cuda:0")
        cuda_waveform = waveform.cuda(device=cuda0)
        self._test_lfilter(cuda_waveform, cuda0)
94

95
96
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
97
98
99
100
101
102
103
104
105
106
107
108
    def test_gain(self):
        test_filepath = os.path.join(self.test_dirpath, "assets", "steam-train-whistle-daniel_simon.wav")
        waveform, _ = torchaudio.load(test_filepath)

        waveform_gain = F.gain(waveform, 3)
        self.assertTrue(waveform_gain.abs().max().item(), 1.)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("gain", [3])
        sox_gain_waveform = E.sox_build_flow_effects()[0]

109
        torch.testing.assert_allclose(waveform_gain, sox_gain_waveform, atol=1e-04, rtol=1e-5)
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_dither(self):
        test_filepath = os.path.join(self.test_dirpath, "assets", "steam-train-whistle-daniel_simon.wav")
        waveform, _ = torchaudio.load(test_filepath)

        waveform_dithered = F.dither(waveform)
        waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("dither", [])
        sox_dither_waveform = E.sox_build_flow_effects()[0]

125
        torch.testing.assert_allclose(waveform_dithered, sox_dither_waveform, atol=1e-04, rtol=1e-5)
126
127
128
129
130
        E.clear_chain()

        E.append_effect_to_chain("dither", ["-s"])
        sox_dither_waveform_ns = E.sox_build_flow_effects()[0]

131
        torch.testing.assert_allclose(waveform_dithered_noiseshaped, sox_dither_waveform_ns, atol=1e-02, rtol=1e-5)
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_vctk_transform_pipeline(self):
        test_filepath_vctk = os.path.join(self.test_dirpath, "assets/VCTK-Corpus/wav48/p224/", "p224_002.wav")
        wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)

        # rate
        sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
        wf_vctk = sample(wf_vctk)
        # dither
        wf_vctk = F.dither(wf_vctk, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath_vctk)
        E.append_effect_to_chain("gain", ["-h"])
        E.append_effect_to_chain("channels", [1])
        E.append_effect_to_chain("rate", [16000])
        E.append_effect_to_chain("gain", ["-rh"])
        E.append_effect_to_chain("dither", ["-s"])
        wf_vctk_sox = E.sox_build_flow_effects()[0]

154
        torch.testing.assert_allclose(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03)
155
156
157

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
158
159
160
161
162
163
164
165
    def test_lowpass(self):

        """
        Test biquad lowpass filter, compare to SoX implementation
        """

        CUTOFF_FREQ = 3000

166
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
167
168
169
170
171
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("lowpass", [CUTOFF_FREQ])
        sox_output_waveform, sr = E.sox_build_flow_effects()

172
        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
173
174
        output_waveform = F.lowpass_biquad(waveform, sample_rate, CUTOFF_FREQ)

175
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
176

177
178
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
179
180
181
182
183
184
185
    def test_highpass(self):
        """
        Test biquad highpass filter, compare to SoX implementation
        """

        CUTOFF_FREQ = 2000

186
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
187
188
189
190
191
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("highpass", [CUTOFF_FREQ])
        sox_output_waveform, sr = E.sox_build_flow_effects()

192
        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
193
194
195
        output_waveform = F.highpass_biquad(waveform, sample_rate, CUTOFF_FREQ)

        # TBD - this fails at the 1e-4 level, debug why
196
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-3, rtol=1e-5)
197

198
199
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
moto's avatar
moto committed
200
201
202
203
204
205
206
207
    def test_allpass(self):
        """
        Test biquad allpass filter, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707

208
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
moto's avatar
moto committed
209
210
211
212
213
214
215
216
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("allpass", [CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.allpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q)

217
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
218

219
220
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
221
222
223
224
225
226
227
228
229
    def test_bandpass_with_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707
        CONST_SKIRT_GAIN = True

230
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
231
232
233
234
235
236
237
238
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bandpass", ["-c", CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bandpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, CONST_SKIRT_GAIN)

239
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
240

241
242
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
243
244
245
246
247
248
249
250
251
    def test_bandpass_without_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707
        CONST_SKIRT_GAIN = False

252
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
253
254
255
256
257
258
259
260
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bandpass", [CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bandpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, CONST_SKIRT_GAIN)

261
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
262

263
264
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
265
266
267
268
269
270
271
272
    def test_bandreject(self):
        """
        Test biquad bandreject filter, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707

273
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
274
275
276
277
278
279
280
281
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("bandreject", [CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.bandreject_biquad(waveform, sample_rate, CENTRAL_FREQ, Q)

282
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
moto's avatar
moto committed
283

284
285
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
286
287
288
289
290
291
292
293
294
    def test_band_with_noise(self):
        """
        Test biquad band filter with noise mode, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707
        NOISE = True

295
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
296
297
298
299
300
301
302
303
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("band", ["-n", CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)

304
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
305

306
307
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
308
309
310
311
312
313
314
315
316
    def test_band_without_noise(self):
        """
        Test biquad band filter without noise mode, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707
        NOISE = False

317
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
318
319
320
321
322
323
324
325
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("band", [CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)

326
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
327

328
329
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
330
331
332
333
334
335
336
337
338
    def test_treble(self):
        """
        Test biquad treble filter, compare to SoX implementation
        """

        CENTRAL_FREQ = 1000
        Q = 0.707
        GAIN = 40

339
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
340
341
342
343
344
345
346
347
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("treble", [GAIN, CENTRAL_FREQ, str(Q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.treble_biquad(waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)

348
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
349

350
351
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
352
353
354
355
356
    def test_deemph(self):
        """
        Test biquad deemph filter, compare to SoX implementation
        """

357
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
358
359
360
361
362
363
364
365
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("deemph")
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.deemph_biquad(waveform, sample_rate)

366
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
367

368
369
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
370
371
372
373
374
    def test_riaa(self):
        """
        Test biquad riaa filter, compare to SoX implementation
        """

375
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
376
377
378
379
380
381
382
383
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("riaa")
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.riaa_biquad(waveform, sample_rate)

384
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
385

386
387
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
xinyang0's avatar
xinyang0 committed
388
389
390
391
392
393
394
395
396
    def test_equalizer(self):
        """
        Test biquad peaking equalizer filter, compare to SoX implementation
        """

        CENTER_FREQ = 300
        Q = 0.707
        GAIN = 1

397
        noise_filepath = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
xinyang0's avatar
xinyang0 committed
398
399
400
401
402
403
404
405
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("equalizer", [CENTER_FREQ, Q, GAIN])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.equalizer_biquad(waveform, sample_rate, CENTER_FREQ, GAIN, Q)

406
        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
xinyang0's avatar
xinyang0 committed
407

408
409
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
410
411
    def test_perf_biquad_filtering(self):

412
        fn_sine = os.path.join(self.test_dirpath, "assets", "whitenoise.wav")
413
414
415
416
417
418
419
420
421
422
423
424

        b0 = 0.4
        b1 = 0.2
        b2 = 0.9
        a0 = 0.7
        a1 = 0.2
        a2 = 0.6

        # SoX method
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(fn_sine)
        E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2])
moto's avatar
moto committed
425
        waveform_sox_out, _ = E.sox_build_flow_effects()
426

moto's avatar
moto committed
427
        waveform, _ = torchaudio.load(fn_sine, normalization=True)
428
429
430
431
        waveform_lfilter_out = F.lfilter(
            waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2])
        )

432
        torch.testing.assert_allclose(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5)
433
434
435
436


if __name__ == "__main__":
    unittest.main()