sox_compatibility_test.py 11.1 KB
Newer Older
1
2
import torch
import torchaudio.functional as F
moto's avatar
moto committed
3
4
import torchaudio.transforms as T
from parameterized import parameterized
5

6
from torchaudio_unittest.common_utils import (
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    skipIfNoSoxBackend,
    skipIfNoExec,
    TempDirMixin,
    TorchaudioTestCase,
    get_asset_path,
    sox_utils,
    load_wav,
    save_wav,
    get_whitenoise,
)


@skipIfNoSoxBackend
@skipIfNoExec('sox')
class TestFunctionalFiltering(TempDirMixin, TorchaudioTestCase):
    def run_sox_effect(self, input_file, effect):
        output_file = self.get_temp_path('expected.wav')
        sox_utils.run_sox_effect(input_file, output_file, [str(e) for e in effect])
        return load_wav(output_file)

    def assert_sox_effect(self, result, input_path, effects, atol=1e-04, rtol=1e-5):
        expected, _ = self.run_sox_effect(input_path, effects)
        self.assertEqual(result, expected, atol=atol, rtol=rtol)

    def get_whitenoise(self, sample_rate=8000):
        noise = get_whitenoise(
            sample_rate=sample_rate, duration=3, scale_factor=0.9,
34
        )
35
36
37
        path = self.get_temp_path("whitenoise.wav")
        save_wav(path, noise, sample_rate)
        return noise, path
38

39
    def test_gain(self):
40
41
42
43
        path = get_asset_path('steam-train-whistle-daniel_simon.wav')
        data, _ = load_wav(path)
        result = F.gain(data, 3)
        self.assert_sox_effect(result, path, ['gain', 3])
44
45

    def test_dither(self):
46
47
48
49
        path = get_asset_path('steam-train-whistle-daniel_simon.wav')
        data, _ = load_wav(path)
        result = F.dither(data)
        self.assert_sox_effect(result, path, ['dither'])
50

51
52
53
54
55
    def test_dither_noise(self):
        path = get_asset_path('steam-train-whistle-daniel_simon.wav')
        data, _ = load_wav(path)
        result = F.dither(data, noise_shaping=True)
        self.assert_sox_effect(result, path, ['dither', '-s'], atol=1.5e-4)
56

57
    def test_lowpass(self):
58
        cutoff_freq = 3000
59
        sample_rate = 8000
60

61
62
63
        data, path = self.get_whitenoise(sample_rate)
        result = F.lowpass_biquad(data, sample_rate, cutoff_freq)
        self.assert_sox_effect(result, path, ['lowpass', cutoff_freq], atol=1.5e-4)
64
65

    def test_highpass(self):
66
        cutoff_freq = 2000
67
        sample_rate = 8000
68

69
70
71
        data, path = self.get_whitenoise(sample_rate)
        result = F.highpass_biquad(data, sample_rate, cutoff_freq)
        self.assert_sox_effect(result, path, ['highpass', cutoff_freq], atol=1.5e-4)
72

moto's avatar
moto committed
73
    def test_allpass(self):
74
75
        central_freq = 1000
        q = 0.707
76
        sample_rate = 8000
moto's avatar
moto committed
77

78
79
80
        data, path = self.get_whitenoise(sample_rate)
        result = F.allpass_biquad(data, sample_rate, central_freq, q)
        self.assert_sox_effect(result, path, ['allpass', central_freq, f'{q}q'])
81
82

    def test_bandpass_with_csg(self):
83
84
85
        central_freq = 1000
        q = 0.707
        const_skirt_gain = True
86
        sample_rate = 8000
87

88
89
90
        data, path = self.get_whitenoise(sample_rate)
        result = F.bandpass_biquad(data, sample_rate, central_freq, q, const_skirt_gain)
        self.assert_sox_effect(result, path, ['bandpass', '-c', central_freq, f'{q}q'])
91
92

    def test_bandpass_without_csg(self):
93
94
95
        central_freq = 1000
        q = 0.707
        const_skirt_gain = False
96
        sample_rate = 8000
97

98
99
100
        data, path = self.get_whitenoise(sample_rate)
        result = F.bandpass_biquad(data, sample_rate, central_freq, q, const_skirt_gain)
        self.assert_sox_effect(result, path, ['bandpass', central_freq, f'{q}q'])
101
102

    def test_bandreject(self):
103
104
        central_freq = 1000
        q = 0.707
105
        sample_rate = 8000
106

107
108
109
        data, path = self.get_whitenoise(sample_rate)
        result = F.bandreject_biquad(data, sample_rate, central_freq, q)
        self.assert_sox_effect(result, path, ['bandreject', central_freq, f'{q}q'])
moto's avatar
moto committed
110

111
    def test_band_with_noise(self):
112
113
114
        central_freq = 1000
        q = 0.707
        noise = True
115
        sample_rate = 8000
116

117
118
119
        data, path = self.get_whitenoise(sample_rate)
        result = F.band_biquad(data, sample_rate, central_freq, q, noise)
        self.assert_sox_effect(result, path, ['band', '-n', central_freq, f'{q}q'])
120
121

    def test_band_without_noise(self):
122
123
124
        central_freq = 1000
        q = 0.707
        noise = False
125
        sample_rate = 8000
126

127
128
129
        data, path = self.get_whitenoise(sample_rate)
        result = F.band_biquad(data, sample_rate, central_freq, q, noise)
        self.assert_sox_effect(result, path, ['band', central_freq, f'{q}q'])
130
131

    def test_treble(self):
132
133
134
        central_freq = 1000
        q = 0.707
        gain = 40
135
        sample_rate = 8000
136

137
138
139
        data, path = self.get_whitenoise(sample_rate)
        result = F.treble_biquad(data, sample_rate, gain, central_freq, q)
        self.assert_sox_effect(result, path, ['treble', gain, central_freq, f'{q}q'])
140

jimchen90's avatar
jimchen90 committed
141
142
143
144
    def test_bass(self):
        central_freq = 1000
        q = 0.707
        gain = 40
145
        sample_rate = 8000
jimchen90's avatar
jimchen90 committed
146

147
148
149
        data, path = self.get_whitenoise(sample_rate)
        result = F.bass_biquad(data, sample_rate, gain, central_freq, q)
        self.assert_sox_effect(result, path, ['bass', gain, central_freq, f'{q}q'], atol=1.5e-4)
jimchen90's avatar
jimchen90 committed
150

151
    def test_deemph(self):
152
153
154
155
        sample_rate = 44100
        data, path = self.get_whitenoise(sample_rate)
        result = F.deemph_biquad(data, sample_rate)
        self.assert_sox_effect(result, path, ['deemph'])
156
157

    def test_riaa(self):
158
159
160
161
        sample_rate = 44100
        data, path = self.get_whitenoise(sample_rate)
        result = F.riaa_biquad(data, sample_rate)
        self.assert_sox_effect(result, path, ['riaa'])
162

163
164
    def test_contrast(self):
        enhancement_amount = 80.
165

166
167
168
        data, path = self.get_whitenoise()
        result = F.contrast(data, enhancement_amount)
        self.assert_sox_effect(result, path, ['contrast', enhancement_amount])
169

170
171
172
    def test_dcshift_with_limiter(self):
        shift = 0.5
        limiter_gain = 0.05
173

174
175
176
        data, path = self.get_whitenoise()
        result = F.dcshift(data, shift, limiter_gain)
        self.assert_sox_effect(result, path, ['dcshift', shift, limiter_gain])
177
178
179

    def test_dcshift_without_limiter(self):
        shift = 0.6
180

181
182
183
        data, path = self.get_whitenoise()
        result = F.dcshift(data, shift)
        self.assert_sox_effect(result, path, ['dcshift', shift])
184

185
186
187
    def test_overdrive(self):
        gain = 30
        colour = 40
188

189
190
191
        data, path = self.get_whitenoise()
        result = F.overdrive(data, gain, colour)
        self.assert_sox_effect(result, path, ['overdrive', gain, colour])
192

193
194
195
196
197
198
    def test_phaser_sine(self):
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
199
        sample_rate = 8000
200

201
202
203
        data, path = self.get_whitenoise(sample_rate)
        result = F.phaser(data, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)
        self.assert_sox_effect(result, path, ['phaser', gain_in, gain_out, delay_ms, decay, speed, '-s'])
204
205
206
207
208
209
210

    def test_phaser_triangle(self):
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
211
        sample_rate = 8000
212

213
214
215
        data, path = self.get_whitenoise(sample_rate)
        result = F.phaser(data, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)
        self.assert_sox_effect(result, path, ['phaser', gain_in, gain_out, delay_ms, decay, speed, '-t'])
216

217
218
219
220
221
222
223
    def test_flanger_triangle_linear(self):
        delay = 0.6
        depth = 0.87
        regen = 3.0
        width = 0.9
        speed = 0.5
        phase = 30
224
        sample_rate = 8000
225

226
227
228
229
230
231
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='triangular', interpolation='linear')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'triangle', phase, 'linear'])
232
233
234
235
236
237
238
239

    def test_flanger_triangle_quad(self):
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.4
        speed = 0.5
        phase = 40
240
        sample_rate = 8000
241

242
243
244
245
246
247
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='triangular', interpolation='quadratic')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'triangle', phase, 'quadratic'])
248
249
250
251
252
253
254
255

    def test_flanger_sine_linear(self):
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.23
        speed = 1.3
        phase = 60
256
        sample_rate = 8000
257

258
259
260
261
262
263
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='sinusoidal', interpolation='linear')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'sine', phase, 'linear'])
264
265
266
267
268
269
270
271

    def test_flanger_sine_quad(self):
        delay = 0.9
        depth = 0.9
        regen = 4.0
        width = 0.23
        speed = 1.3
        phase = 25
272
        sample_rate = 8000
273

274
275
276
277
278
279
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='sinusoidal', interpolation='quadratic')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'sine', phase, 'quadratic'])
280

xinyang0's avatar
xinyang0 committed
281
    def test_equalizer(self):
282
283
284
        center_freq = 300
        q = 0.707
        gain = 1
285
        sample_rate = 8000
xinyang0's avatar
xinyang0 committed
286

287
288
289
        data, path = self.get_whitenoise(sample_rate)
        result = F.equalizer_biquad(data, sample_rate, center_freq, gain, q)
        self.assert_sox_effect(result, path, ['equalizer', center_freq, q, gain])
xinyang0's avatar
xinyang0 committed
290

291
292
293
294
295
296
297
298
    def test_perf_biquad_filtering(self):
        b0 = 0.4
        b1 = 0.2
        b2 = 0.9
        a0 = 0.7
        a1 = 0.2
        a2 = 0.6

299
300
301
        data, path = self.get_whitenoise()
        result = F.lfilter(data, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2]))
        self.assert_sox_effect(result, path, ['biquad', b0, b1, b2, a0, a1, a2])
moto's avatar
moto committed
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329

    @parameterized.expand([
        ('q', 'quarter_sine'),
        ('h', 'half_sine'),
        ('t', 'linear'),
    ])
    def test_fade(self, fade_shape_sox, fade_shape):
        fade_in_len, fade_out_len = 44100, 44100
        data, path = self.get_whitenoise(sample_rate=44100)
        result = T.Fade(fade_in_len, fade_out_len, fade_shape)(data)
        self.assert_sox_effect(result, path, ['fade', fade_shape_sox, '1', '0', '1'])

    @parameterized.expand([
        ('amplitude', 1.1),
        ('db', 2),
        ('power', 2),
    ])
    def test_vol(self, gain_type, gain):
        data, path = self.get_whitenoise()
        result = T.Vol(gain, gain_type)(data)
        self.assert_sox_effect(result, path, ['vol', f'{gain}', gain_type])

    @parameterized.expand(['vad-go-stereo-44100.wav', 'vad-go-mono-32000.wav'])
    def test_vad(self, filename):
        path = get_asset_path(filename)
        data, sample_rate = load_wav(path)
        result = T.Vad(sample_rate)(data)
        self.assert_sox_effect(result, path, ['vad'])