test_sox_compatibility.py 18.8 KB
Newer Older
1
2
import unittest

3
4
5
import torch
import torchaudio
import torchaudio.functional as F
6
7
import torchaudio.transforms as T

8
from . import common_utils
9
10


moto's avatar
moto committed
11
@common_utils.skipIfNoSoxBackend
12
class TestFunctionalFiltering(common_utils.TempDirMixin, common_utils.TorchaudioTestCase):
moto's avatar
moto committed
13
14
    backend = 'sox'

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
    def setUp(self):
        # 1. Create int16 signal to save as PCM wav
        # 2. Write to temp file
        # 3. Load temp file into tensor to reuse in downstream tests
        #    Prefer to use common_utils.load_wav() but this implementation does
        #    not match torchaudio.load and errors on downstream tests
        super().setUp()

        self.NOISE_SAMPLE_RATE = 44100  # N.B. 44.1 kHz required by SoX deemph effect
        noise_waveform_as_int = common_utils.get_whitenoise(
            sample_rate=self.NOISE_SAMPLE_RATE, duration=5, dtype=torch.int16, scale_factor=0.9,
        )
        self.noise_filepath = self.get_temp_path("whitenoise.wav")
        common_utils.save_wav(
            self.noise_filepath, noise_waveform_as_int, self.NOISE_SAMPLE_RATE
        )
        self.noise_waveform, _ = torchaudio.load(self.noise_filepath, normalization=True)

33
    def test_gain(self):
34
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
35
        waveform, _ = common_utils.load_wav(test_filepath)
36
37
38
39
40
41
42
43
44

        waveform_gain = F.gain(waveform, 3)
        self.assertTrue(waveform_gain.abs().max().item(), 1.)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("gain", [3])
        sox_gain_waveform = E.sox_build_flow_effects()[0]

45
        self.assertEqual(waveform_gain, sox_gain_waveform, atol=1e-04, rtol=1e-5)
46
47

    def test_dither(self):
48
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
49
        waveform, _ = common_utils.load_wav(test_filepath)
50
51
52
53
54
55
56
57
58

        waveform_dithered = F.dither(waveform)
        waveform_dithered_noiseshaped = F.dither(waveform, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath)
        E.append_effect_to_chain("dither", [])
        sox_dither_waveform = E.sox_build_flow_effects()[0]

59
        self.assertEqual(waveform_dithered, sox_dither_waveform, atol=1e-04, rtol=1e-5)
60
61
62
63
64
        E.clear_chain()

        E.append_effect_to_chain("dither", ["-s"])
        sox_dither_waveform_ns = E.sox_build_flow_effects()[0]

65
        self.assertEqual(waveform_dithered_noiseshaped, sox_dither_waveform_ns, atol=1e-02, rtol=1e-5)
66
67

    def test_vctk_transform_pipeline(self):
68
        test_filepath_vctk = common_utils.get_asset_path('VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav')
69
        wf_vctk, sr_vctk = common_utils.load_wav(test_filepath_vctk)
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

        # rate
        sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
        wf_vctk = sample(wf_vctk)
        # dither
        wf_vctk = F.dither(wf_vctk, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath_vctk)
        E.append_effect_to_chain("gain", ["-h"])
        E.append_effect_to_chain("channels", [1])
        E.append_effect_to_chain("rate", [16000])
        E.append_effect_to_chain("gain", ["-rh"])
        E.append_effect_to_chain("dither", ["-s"])
        wf_vctk_sox = E.sox_build_flow_effects()[0]

86
        self.assertEqual(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03)
87

88
89
90
91
    def test_lowpass(self):
        """
        Test biquad lowpass filter, compare to SoX implementation
        """
92
        cutoff_freq = 3000
93
94

        E = torchaudio.sox_effects.SoxEffectsChain()
95
        E.set_input_file(self.noise_filepath)
96
        E.append_effect_to_chain("lowpass", [cutoff_freq])
97
98
        sox_output_waveform, sr = E.sox_build_flow_effects()

99
        output_waveform = F.lowpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, cutoff_freq)
100

101
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
102
103
104
105
106

    def test_highpass(self):
        """
        Test biquad highpass filter, compare to SoX implementation
        """
107
        cutoff_freq = 2000
108
109

        E = torchaudio.sox_effects.SoxEffectsChain()
110
        E.set_input_file(self.noise_filepath)
111
        E.append_effect_to_chain("highpass", [cutoff_freq])
112
113
        sox_output_waveform, sr = E.sox_build_flow_effects()

114
        output_waveform = F.highpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, cutoff_freq)
115

116
        self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-3, rtol=1e-5)
117

moto's avatar
moto committed
118
119
120
121
    def test_allpass(self):
        """
        Test biquad allpass filter, compare to SoX implementation
        """
122
123
        central_freq = 1000
        q = 0.707
moto's avatar
moto committed
124
125

        E = torchaudio.sox_effects.SoxEffectsChain()
126
        E.set_input_file(self.noise_filepath)
127
        E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
moto's avatar
moto committed
128
129
        sox_output_waveform, sr = E.sox_build_flow_effects()

130
        output_waveform = F.allpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, central_freq, q)
moto's avatar
moto committed
131

132
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
133
134
135
136
137

    def test_bandpass_with_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """
138
139
140
        central_freq = 1000
        q = 0.707
        const_skirt_gain = True
141
142

        E = torchaudio.sox_effects.SoxEffectsChain()
143
        E.set_input_file(self.noise_filepath)
144
        E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
145
146
        sox_output_waveform, sr = E.sox_build_flow_effects()

147
148
        output_waveform = F.bandpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE,
                                            central_freq, q, const_skirt_gain)
149

150
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
151
152
153
154
155

    def test_bandpass_without_csg(self):
        """
        Test biquad bandpass filter, compare to SoX implementation
        """
156
157
158
        central_freq = 1000
        q = 0.707
        const_skirt_gain = False
159
160

        E = torchaudio.sox_effects.SoxEffectsChain()
161
        E.set_input_file(self.noise_filepath)
162
        E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
163
164
        sox_output_waveform, sr = E.sox_build_flow_effects()

165
166
        output_waveform = F.bandpass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE,
                                            central_freq, q, const_skirt_gain)
167

168
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
169
170
171
172
173

    def test_bandreject(self):
        """
        Test biquad bandreject filter, compare to SoX implementation
        """
174
175
        central_freq = 1000
        q = 0.707
176
177

        E = torchaudio.sox_effects.SoxEffectsChain()
178
        E.set_input_file(self.noise_filepath)
179
        E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
180
181
        sox_output_waveform, sr = E.sox_build_flow_effects()

182
183
        output_waveform = F.bandreject_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE,
                                              central_freq, q)
184

185
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
moto's avatar
moto committed
186

187
188
189
190
    def test_band_with_noise(self):
        """
        Test biquad band filter with noise mode, compare to SoX implementation
        """
191
192
193
        central_freq = 1000
        q = 0.707
        noise = True
194
195

        E = torchaudio.sox_effects.SoxEffectsChain()
196
        E.set_input_file(self.noise_filepath)
197
        E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
198
199
        sox_output_waveform, sr = E.sox_build_flow_effects()

200
        output_waveform = F.band_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, central_freq, q, noise)
201

202
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
203
204
205
206
207
208

    def test_band_without_noise(self):
        """
        Test biquad band filter without noise mode, compare to SoX implementation
        """

209
210
211
        central_freq = 1000
        q = 0.707
        noise = False
212
213

        E = torchaudio.sox_effects.SoxEffectsChain()
214
        E.set_input_file(self.noise_filepath)
215
        E.append_effect_to_chain("band", [central_freq, str(q) + 'q'])
216
217
        sox_output_waveform, sr = E.sox_build_flow_effects()

218
        output_waveform = F.band_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, central_freq, q, noise)
219

220
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
221
222
223
224
225
226

    def test_treble(self):
        """
        Test biquad treble filter, compare to SoX implementation
        """

227
228
229
        central_freq = 1000
        q = 0.707
        gain = 40
230
231

        E = torchaudio.sox_effects.SoxEffectsChain()
232
        E.set_input_file(self.noise_filepath)
233
        E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
234
235
        sox_output_waveform, sr = E.sox_build_flow_effects()

236
        output_waveform = F.treble_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, gain, central_freq, q)
237

238
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
239

jimchen90's avatar
jimchen90 committed
240
241
242
243
244
245
246
247
248
249
    def test_bass(self):
        """
        Test biquad bass filter, compare to SoX implementation
        """

        central_freq = 1000
        q = 0.707
        gain = 40

        E = torchaudio.sox_effects.SoxEffectsChain()
250
        E.set_input_file(self.noise_filepath)
jimchen90's avatar
jimchen90 committed
251
252
253
        E.append_effect_to_chain("bass", [gain, central_freq, str(q) + 'q'])
        sox_output_waveform, sr = E.sox_build_flow_effects()

254
        output_waveform = F.bass_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, gain, central_freq, q)
jimchen90's avatar
jimchen90 committed
255
256
257

        self.assertEqual(output_waveform, sox_output_waveform, atol=1.5e-4, rtol=1e-5)

258
259
260
261
262
    def test_deemph(self):
        """
        Test biquad deemph filter, compare to SoX implementation
        """
        E = torchaudio.sox_effects.SoxEffectsChain()
263
        E.set_input_file(self.noise_filepath)
264
265
266
        E.append_effect_to_chain("deemph")
        sox_output_waveform, sr = E.sox_build_flow_effects()

267
        output_waveform = F.deemph_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE)
268

269
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
270
271
272
273
274
275

    def test_riaa(self):
        """
        Test biquad riaa filter, compare to SoX implementation
        """
        E = torchaudio.sox_effects.SoxEffectsChain()
276
        E.set_input_file(self.noise_filepath)
277
278
279
        E.append_effect_to_chain("riaa")
        sox_output_waveform, sr = E.sox_build_flow_effects()

280
        output_waveform = F.riaa_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE)
281

282
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
283

284
285
286
287
288
    def test_contrast(self):
        """
        Test contrast effect, compare to SoX implementation
        """
        enhancement_amount = 80.
289

290
        E = torchaudio.sox_effects.SoxEffectsChain()
291
        E.set_input_file(self.noise_filepath)
292
293
294
        E.append_effect_to_chain("contrast", [enhancement_amount])
        sox_output_waveform, sr = E.sox_build_flow_effects()

295
        output_waveform = F.contrast(self.noise_waveform, enhancement_amount)
296

297
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
298

299
300
301
302
303
304
    def test_dcshift_with_limiter(self):
        """
        Test dcshift effect, compare to SoX implementation
        """
        shift = 0.5
        limiter_gain = 0.05
305

306
        E = torchaudio.sox_effects.SoxEffectsChain()
307
        E.set_input_file(self.noise_filepath)
308
309
310
        E.append_effect_to_chain("dcshift", [shift, limiter_gain])
        sox_output_waveform, sr = E.sox_build_flow_effects()

311
        output_waveform = F.dcshift(self.noise_waveform, shift, limiter_gain)
312

313
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
314
315
316
317
318
319

    def test_dcshift_without_limiter(self):
        """
        Test dcshift effect, compare to SoX implementation
        """
        shift = 0.6
320

321
        E = torchaudio.sox_effects.SoxEffectsChain()
322
        E.set_input_file(self.noise_filepath)
323
324
325
        E.append_effect_to_chain("dcshift", [shift])
        sox_output_waveform, sr = E.sox_build_flow_effects()

326
        output_waveform = F.dcshift(self.noise_waveform, shift)
327

328
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
329

330
331
332
333
334
335
    def test_overdrive(self):
        """
        Test overdrive effect, compare to SoX implementation
        """
        gain = 30
        colour = 40
336

337
        E = torchaudio.sox_effects.SoxEffectsChain()
338
        E.set_input_file(self.noise_filepath)
339
340
341
        E.append_effect_to_chain("overdrive", [gain, colour])
        sox_output_waveform, sr = E.sox_build_flow_effects()

342
        output_waveform = F.overdrive(self.noise_waveform, gain, colour)
343

344
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
345

346
347
348
349
350
351
352
353
354
    def test_phaser_sine(self):
        """
        Test phaser effect with sine moduldation, compare to SoX implementation
        """
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
355

356
        E = torchaudio.sox_effects.SoxEffectsChain()
357
        E.set_input_file(self.noise_filepath)
358
359
360
        E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-s"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

361
362
        output_waveform = F.phaser(self.noise_waveform, self.NOISE_SAMPLE_RATE,
                                   gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)
363

364
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
365
366
367
368
369
370
371
372
373
374

    def test_phaser_triangle(self):
        """
        Test phaser effect with triangle modulation, compare to SoX implementation
        """
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
375

376
        E = torchaudio.sox_effects.SoxEffectsChain()
377
        E.set_input_file(self.noise_filepath)
378
379
380
        E.append_effect_to_chain("phaser", [gain_in, gain_out, delay_ms, decay, speed, "-t"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

381
382
        output_waveform = F.phaser(self.noise_waveform, self.NOISE_SAMPLE_RATE,
                                   gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)
383

384
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
385

386
387
388
389
390
391
392
393
394
395
    def test_flanger_triangle_linear(self):
        """
        Test flanger effect with triangle modulation and linear interpolation, compare to SoX implementation
        """
        delay = 0.6
        depth = 0.87
        regen = 3.0
        width = 0.9
        speed = 0.5
        phase = 30
396

397
        E = torchaudio.sox_effects.SoxEffectsChain()
398
        E.set_input_file(self.noise_filepath)
399
400
401
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "linear"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

402
403
        output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth, regen,
                                    width, speed, phase, modulation='triangular', interpolation='linear')
404
405
406
407
408
409
410
411
412
413
414
415
416

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

    def test_flanger_triangle_quad(self):
        """
        Test flanger effect with triangle modulation and quadratic interpolation, compare to SoX implementation
        """
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.4
        speed = 0.5
        phase = 40
417

418
        E = torchaudio.sox_effects.SoxEffectsChain()
419
        E.set_input_file(self.noise_filepath)
420
421
422
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "triangle", phase, "quadratic"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

423
424
        output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth,
                                    regen, width, speed, phase, modulation='triangular', interpolation='quadratic')
425
426
427
428
429
430
431
432
433
434
435
436
437

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

    def test_flanger_sine_linear(self):
        """
        Test flanger effect with sine modulation and linear interpolation, compare to SoX implementation
        """
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.23
        speed = 1.3
        phase = 60
438

439
        E = torchaudio.sox_effects.SoxEffectsChain()
440
        E.set_input_file(self.noise_filepath)
441
442
443
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "linear"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

444
445
        output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth,
                                    regen, width, speed, phase, modulation='sinusoidal', interpolation='linear')
446
447
448
449
450
451
452
453
454
455
456
457
458

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

    def test_flanger_sine_quad(self):
        """
        Test flanger effect with sine modulation and quadratic interpolation, compare to SoX implementation
        """
        delay = 0.9
        depth = 0.9
        regen = 4.0
        width = 0.23
        speed = 1.3
        phase = 25
459

460
        E = torchaudio.sox_effects.SoxEffectsChain()
461
        E.set_input_file(self.noise_filepath)
462
463
464
        E.append_effect_to_chain("flanger", [delay, depth, regen, width, speed, "sine", phase, "quadratic"])
        sox_output_waveform, sr = E.sox_build_flow_effects()

465
466
        output_waveform = F.flanger(self.noise_waveform, self.NOISE_SAMPLE_RATE, delay, depth,
                                    regen, width, speed, phase, modulation='sinusoidal', interpolation='quadratic')
467
468
469

        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

xinyang0's avatar
xinyang0 committed
470
471
472
473
474
    def test_equalizer(self):
        """
        Test biquad peaking equalizer filter, compare to SoX implementation
        """

475
476
477
        center_freq = 300
        q = 0.707
        gain = 1
xinyang0's avatar
xinyang0 committed
478
479

        E = torchaudio.sox_effects.SoxEffectsChain()
480
        E.set_input_file(self.noise_filepath)
481
        E.append_effect_to_chain("equalizer", [center_freq, q, gain])
xinyang0's avatar
xinyang0 committed
482
483
        sox_output_waveform, sr = E.sox_build_flow_effects()

484
        output_waveform = F.equalizer_biquad(self.noise_waveform, self.NOISE_SAMPLE_RATE, center_freq, gain, q)
xinyang0's avatar
xinyang0 committed
485

486
        self.assertEqual(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
xinyang0's avatar
xinyang0 committed
487

488
489
490
491
492
493
494
495
496
497
498
    def test_perf_biquad_filtering(self):

        b0 = 0.4
        b1 = 0.2
        b2 = 0.9
        a0 = 0.7
        a1 = 0.2
        a2 = 0.6

        # SoX method
        E = torchaudio.sox_effects.SoxEffectsChain()
499
        E.set_input_file(self.noise_filepath)
500
        E.append_effect_to_chain("biquad", [b0, b1, b2, a0, a1, a2])
moto's avatar
moto committed
501
        waveform_sox_out, _ = E.sox_build_flow_effects()
502
503

        waveform_lfilter_out = F.lfilter(
504
            self.noise_waveform, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2])
505
506
        )

507
        self.assertEqual(waveform_lfilter_out, waveform_sox_out, atol=1e-4, rtol=1e-5)
508
509
510
511


if __name__ == "__main__":
    unittest.main()