test_sox_compatibility.py 18.1 KB
Newer Older
1
2
import unittest

3
import torch
4
from torch.testing._internal.common_utils import TestCase
5
6
import torchaudio
import torchaudio.functional as F
7
8
import torchaudio.transforms as T

9
10
import common_utils
from common_utils import AudioBackendScope, BACKENDS
11
12


13
class TestFunctionalFiltering(TestCase):
14
15
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
16
    def test_gain(self):
17
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
18
19
20
21
22
23
24
25
26
27
        waveform, _ = torchaudio.load(test_filepath)

        waveform_gain = F.gain(waveform, 3)
        self.assertTrue(waveform_gain.abs().max().item(), 1.)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("gain", [3])
        sox_gain_waveform = E.sox_build_flow_effects()[0]

28
        self.assertEqual(waveform_gain, sox_gain_waveform, atol=1e-04, rtol=1e-5)
29
30
31
32

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_dither(self):
33
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
34
35
36
37
38
39
40
41
42
43
        waveform, _ = torchaudio.load(test_filepath)

        waveform_dithered = F.dither(waveform)
        waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("dither", [])
        sox_dither_waveform = E.sox_build_flow_effects()[0]

44
        self.assertEqual(waveform_dithered, sox_dither_waveform, atol=1e-04, rtol=1e-5)
45
46
47
48
49
        E.clear_chain()

        E.append_effect_to_chain("dither", ["-s"])
        sox_dither_waveform_ns = E.sox_build_flow_effects()[0]

50
        self.assertEqual(waveform_dithered_noiseshaped, sox_dither_waveform_ns, atol=1e-02, rtol=1e-5)
51
52
53
54

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_vctk_transform_pipeline(self):
55
        test_filepath_vctk = common_utils.get_asset_path('VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav')
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
        wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)

        # rate
        sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
        wf_vctk = sample(wf_vctk)
        # dither
        wf_vctk = F.dither(wf_vctk, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath_vctk)
        E.append_effect_to_chain("gain", ["-h"])
        E.append_effect_to_chain("channels", [1])
        E.append_effect_to_chain("rate", [16000])
        E.append_effect_to_chain("gain", ["-rh"])
        E.append_effect_to_chain("dither", ["-s"])
        wf_vctk_sox = E.sox_build_flow_effects()[0]

73
        self.assertEqual(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03)
74
75
76

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
77
78
79
80
81
    def test_lowpass(self):
        """
        Test biquad lowpass filter, compare to SoX implementation
        """

82
        cutoff_freq = 3000
83

84
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
85
86
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
87
        E.append_effect_to_chain("lowpass", [cutoff_freq])
88
89
        sox_output_waveform, sr = E.sox_build_flow_effects()

90
        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
91
        output_waveform = F.lowpass_biquad(waveform, sample_rate, cutoff_freq)
92

93
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
94

95
96
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
97
98
99
100
101
    def test_highpass(self):
        """
        Test biquad highpass filter, compare to SoX implementation
        """

102
        cutoff_freq = 2000
103

104
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
105
106
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
107
        E.append_effect_to_chain("highpass", [cutoff_freq])
108
109
        sox_output_waveform, sr = E.sox_build_flow_effects()

110
        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
111
        output_waveform = F.highpass_biquad(waveform, sample_rate, cutoff_freq)
112
113

        # TBD - this fails at the 1e-4 level, debug why
114
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-3, rtol=1e-5)
115

116
117
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
moto's avatar
moto committed
118
119
120
121
122
    def test_allpass(self):
        """
        Test biquad allpass filter, compare to SoX implementation
        """

123
124
        central_freq = 1000
        q = 0.707
moto's avatar
moto committed
125

126
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
moto's avatar
moto committed
127
128
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
129
        E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
moto's avatar
moto committed
130
131
132
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
133
        output_waveform = F.allpass_biquad(waveform, sample_rate, central_freq, q)
moto's avatar
moto committed
134

135
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
136

137
138
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
139
140
141
142
143
    def test_bandpass_with_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """

144
145
146
        central_freq = 1000
        q = 0.707
        const_skirt_gain = True
147

148
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
149
150
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
151
        E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
152
153
154
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
155
        output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)
156

157
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
158

159
160
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
161
162
163
164
165
    def test_bandpass_without_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """

166
167
168
        central_freq = 1000
        q = 0.707
        const_skirt_gain = False
169

170
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
171
172
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
173
        E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
174
175
176
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
177
        output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)
178

179
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
180

181
182
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
183
184
185
186
187
    def test_bandreject(self):
        """
        Test biquad bandreject filter, compare to SoX implementation
        """

188
189
        central_freq = 1000
        q = 0.707
190

191
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
192
193
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
194
        E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
195
196
197
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
198
        output_waveform = F.bandreject_biquad(waveform, sample_rate, central_freq, q)
199

200
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
moto's avatar
moto committed
201

202
203
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
204
205
206
207
208
    def test_band_with_noise(self):
        """
        Test biquad band filter with noise mode, compare to SoX implementation
        """

209
210
211
        central_freq = 1000
        q = 0.707
        noise = True
212

213
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
214
215
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
216
        E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
217
218
219
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
220
        output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
221

222
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
223

224
225
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
226
227
228
229
230
    def test_band_without_noise(self):
        """
        Test biquad band filter without noise mode, compare to SoX implementation
        """

231
232
233
        central_freq = 1000
        q = 0.707
        noise = False
234

235
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
236
237
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
238
        E.append_effect_to_chain("band", [central_freq, str(q) + 'q'])
239
240
241
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
242
        output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
243

244
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
245

246
247
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
248
249
250
251
252
    def test_treble(self):
        """
        Test biquad treble filter, compare to SoX implementation
        """

253
254
255
        central_freq = 1000
        q = 0.707
        gain = 40
256

257
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
258
259
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
260
        E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
261
262
263
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
264
        output_waveform = F.treble_biquad(waveform, sample_rate, gain, central_freq, q)
265

266
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
267

268
269
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
270
271
272
273
274
    def test_deemph(self):
        """
        Test biquad deemph filter, compare to SoX implementation
        """

275
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
276
277
278
279
280
281
282
283
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("deemph")
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.deemph_biquad(waveform, sample_rate)

284
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
285

286
287
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
288
289
290
291
292
    def test_riaa(self):
        """
        Test biquad riaa filter, compare to SoX implementation
        """

293
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
294
295
296
297
298
299
300
301
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("riaa")
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.riaa_biquad(waveform, sample_rate)

302
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
303

304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_contrast(self):
        """
        Test contrast effect, compare to SoX implementation
        """
        enhancement_amount = 80.
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("contrast", [enhancement_amount])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.contrast(waveform, enhancement_amount)

320
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
321

322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_dcshift_with_limiter(self):
        """
        Test dcshift effect, compare to SoX implementation
        """
        shift = 0.5
        limiter_gain = 0.05
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("dcshift", [shift, limiter_gain])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, _ = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.dcshift(waveform, shift, limiter_gain)

339
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_dcshift_without_limiter(self):
        """
        Test dcshift effect, compare to SoX implementation
        """
        shift = 0.6
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("dcshift", [shift])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, _ = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.dcshift(waveform, shift)

357
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
358

359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_overdrive(self):
        """
        Test overdrive effect, compare to SoX implementation
        """
        gain = 30
        colour = 40
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("overdrive", [gain, colour])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, _ = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.overdrive(waveform, gain, colour)

376
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
377

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_phaser_sine(self):
        """
        Test phaser effect with sine moduldation, compare to SoX implementation
        """
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-s"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)

398
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419

    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
    def test_phaser_triangle(self):
        """
        Test phaser effect with triangle modulation, compare to SoX implementation
        """
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
        E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-t"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
        output_waveform = F.phaser(waveform, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)

420
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
421

422
423
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
xinyang0's avatar
xinyang0 committed
424
425
426
427
428
    def test_equalizer(self):
        """
        Test biquad peaking equalizer filter, compare to SoX implementation
        """

429
430
431
        center_freq = 300
        q = 0.707
        gain = 1
xinyang0's avatar
xinyang0 committed
432

433
        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
xinyang0's avatar
xinyang0 committed
434
435
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(noise_filepath)
436
        E.append_effect_to_chain("equalizer", [center_freq, q, gain])
xinyang0's avatar
xinyang0 committed
437
438
439
        sox_output_waveform, sr = E.sox_build_flow_effects()

        waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
440
        output_waveform = F.equalizer_biquad(waveform, sample_rate, center_freq, gain, q)
xinyang0's avatar
xinyang0 committed
441

442
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
xinyang0's avatar
xinyang0 committed
443

444
445
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
446
447
    def test_perf_biquad_filtering(self):

448
        fn_sine = common_utils.get_asset_path('whitenoise.wav')
449
450
451
452
453
454
455
456
457
458
459
460

        b0 = 0.4
        b1 = 0.2
        b2 = 0.9
        a0 = 0.7
        a1 = 0.2
        a2 = 0.6

        # SoX method
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(fn_sine)
        E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2])
moto's avatar
moto committed
461
        waveform_sox_out, _ = E.sox_build_flow_effects()
462

moto's avatar
moto committed
463
        waveform, _ = torchaudio.load(fn_sine, normalization=True)
464
465
466
467
        waveform_lfilter_out = F.lfilter(
            waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2])
        )

468
        self.assertEqual(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5)
469
470
471
472


if __name__ == "__main__":
    unittest.main()