test_sox_compatibility.py 11.1 KB
Newer Older
1
2
import unittest

3
4
import torch
import torchaudio.functional as F
moto's avatar
moto committed
5
6
import torchaudio.transforms as T
from parameterized import parameterized
7

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from .common_utils import (
    skipIfNoSoxBackend,
    skipIfNoExec,
    TempDirMixin,
    TorchaudioTestCase,
    get_asset_path,
    sox_utils,
    load_wav,
    save_wav,
    get_whitenoise,
)


@skipIfNoSoxBackend
@skipIfNoExec('sox')
class TestFunctionalFiltering(TempDirMixin, TorchaudioTestCase):
    def run_sox_effect(self, input_file, effect):
        output_file = self.get_temp_path('expected.wav')
        sox_utils.run_sox_effect(input_file, output_file, [str(e) for e in effect])
        return load_wav(output_file)

    def assert_sox_effect(self, result, input_path, effects, atol=1e-04, rtol=1e-5):
        expected, _ = self.run_sox_effect(input_path, effects)
        self.assertEqual(result, expected, atol=atol, rtol=rtol)

    def get_whitenoise(self, sample_rate=8000):
        noise = get_whitenoise(
            sample_rate=sample_rate, duration=3, scale_factor=0.9,
36
        )
37
38
39
        path = self.get_temp_path("whitenoise.wav")
        save_wav(path, noise, sample_rate)
        return noise, path
40

41
    def test_gain(self):
42
43
44
45
        path = get_asset_path('steam-train-whistle-daniel_simon.wav')
        data, _ = load_wav(path)
        result = F.gain(data, 3)
        self.assert_sox_effect(result, path, ['gain', 3])
46
47

    def test_dither(self):
48
49
50
51
        path = get_asset_path('steam-train-whistle-daniel_simon.wav')
        data, _ = load_wav(path)
        result = F.dither(data)
        self.assert_sox_effect(result, path, ['dither'])
52

53
54
55
56
57
    def test_dither_noise(self):
        path = get_asset_path('steam-train-whistle-daniel_simon.wav')
        data, _ = load_wav(path)
        result = F.dither(data, noise_shaping=True)
        self.assert_sox_effect(result, path, ['dither', '-s'], atol=1.5e-4)
58

59
    def test_lowpass(self):
60
        cutoff_freq = 3000
61
        sample_rate = 8000
62

63
64
65
        data, path = self.get_whitenoise(sample_rate)
        result = F.lowpass_biquad(data, sample_rate, cutoff_freq)
        self.assert_sox_effect(result, path, ['lowpass', cutoff_freq], atol=1.5e-4)
66
67

    def test_highpass(self):
68
        cutoff_freq = 2000
69
        sample_rate = 8000
70

71
72
73
        data, path = self.get_whitenoise(sample_rate)
        result = F.highpass_biquad(data, sample_rate, cutoff_freq)
        self.assert_sox_effect(result, path, ['highpass', cutoff_freq], atol=1.5e-4)
74

moto's avatar
moto committed
75
    def test_allpass(self):
76
77
        central_freq = 1000
        q = 0.707
78
        sample_rate = 8000
moto's avatar
moto committed
79

80
81
82
        data, path = self.get_whitenoise(sample_rate)
        result = F.allpass_biquad(data, sample_rate, central_freq, q)
        self.assert_sox_effect(result, path, ['allpass', central_freq, f'{q}q'])
83
84

    def test_bandpass_with_csg(self):
85
86
87
        central_freq = 1000
        q = 0.707
        const_skirt_gain = True
88
        sample_rate = 8000
89

90
91
92
        data, path = self.get_whitenoise(sample_rate)
        result = F.bandpass_biquad(data, sample_rate, central_freq, q, const_skirt_gain)
        self.assert_sox_effect(result, path, ['bandpass', '-c', central_freq, f'{q}q'])
93
94

    def test_bandpass_without_csg(self):
95
96
97
        central_freq = 1000
        q = 0.707
        const_skirt_gain = False
98
        sample_rate = 8000
99

100
101
102
        data, path = self.get_whitenoise(sample_rate)
        result = F.bandpass_biquad(data, sample_rate, central_freq, q, const_skirt_gain)
        self.assert_sox_effect(result, path, ['bandpass', central_freq, f'{q}q'])
103
104

    def test_bandreject(self):
105
106
        central_freq = 1000
        q = 0.707
107
        sample_rate = 8000
108

109
110
111
        data, path = self.get_whitenoise(sample_rate)
        result = F.bandreject_biquad(data, sample_rate, central_freq, q)
        self.assert_sox_effect(result, path, ['bandreject', central_freq, f'{q}q'])
moto's avatar
moto committed
112

113
    def test_band_with_noise(self):
114
115
116
        central_freq = 1000
        q = 0.707
        noise = True
117
        sample_rate = 8000
118

119
120
121
        data, path = self.get_whitenoise(sample_rate)
        result = F.band_biquad(data, sample_rate, central_freq, q, noise)
        self.assert_sox_effect(result, path, ['band', '-n', central_freq, f'{q}q'])
122
123

    def test_band_without_noise(self):
124
125
126
        central_freq = 1000
        q = 0.707
        noise = False
127
        sample_rate = 8000
128

129
130
131
        data, path = self.get_whitenoise(sample_rate)
        result = F.band_biquad(data, sample_rate, central_freq, q, noise)
        self.assert_sox_effect(result, path, ['band', central_freq, f'{q}q'])
132
133

    def test_treble(self):
134
135
136
        central_freq = 1000
        q = 0.707
        gain = 40
137
        sample_rate = 8000
138

139
140
141
        data, path = self.get_whitenoise(sample_rate)
        result = F.treble_biquad(data, sample_rate, gain, central_freq, q)
        self.assert_sox_effect(result, path, ['treble', gain, central_freq, f'{q}q'])
142

jimchen90's avatar
jimchen90 committed
143
144
145
146
    def test_bass(self):
        central_freq = 1000
        q = 0.707
        gain = 40
147
        sample_rate = 8000
jimchen90's avatar
jimchen90 committed
148

149
150
151
        data, path = self.get_whitenoise(sample_rate)
        result = F.bass_biquad(data, sample_rate, gain, central_freq, q)
        self.assert_sox_effect(result, path, ['bass', gain, central_freq, f'{q}q'], atol=1.5e-4)
jimchen90's avatar
jimchen90 committed
152

153
    def test_deemph(self):
154
155
156
157
        sample_rate = 44100
        data, path = self.get_whitenoise(sample_rate)
        result = F.deemph_biquad(data, sample_rate)
        self.assert_sox_effect(result, path, ['deemph'])
158
159

    def test_riaa(self):
160
161
162
163
        sample_rate = 44100
        data, path = self.get_whitenoise(sample_rate)
        result = F.riaa_biquad(data, sample_rate)
        self.assert_sox_effect(result, path, ['riaa'])
164

165
166
    def test_contrast(self):
        enhancement_amount = 80.
167

168
169
170
        data, path = self.get_whitenoise()
        result = F.contrast(data, enhancement_amount)
        self.assert_sox_effect(result, path, ['contrast', enhancement_amount])
171

172
173
174
    def test_dcshift_with_limiter(self):
        shift = 0.5
        limiter_gain = 0.05
175

176
177
178
        data, path = self.get_whitenoise()
        result = F.dcshift(data, shift, limiter_gain)
        self.assert_sox_effect(result, path, ['dcshift', shift, limiter_gain])
179
180
181

    def test_dcshift_without_limiter(self):
        shift = 0.6
182

183
184
185
        data, path = self.get_whitenoise()
        result = F.dcshift(data, shift)
        self.assert_sox_effect(result, path, ['dcshift', shift])
186

187
188
189
    def test_overdrive(self):
        gain = 30
        colour = 40
190

191
192
193
        data, path = self.get_whitenoise()
        result = F.overdrive(data, gain, colour)
        self.assert_sox_effect(result, path, ['overdrive', gain, colour])
194

195
196
197
198
199
200
    def test_phaser_sine(self):
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
201
        sample_rate = 8000
202

203
204
205
        data, path = self.get_whitenoise(sample_rate)
        result = F.phaser(data, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True)
        self.assert_sox_effect(result, path, ['phaser', gain_in, gain_out, delay_ms, decay, speed, '-s'])
206
207
208
209
210
211
212

    def test_phaser_triangle(self):
        gain_in = 0.5
        gain_out = 0.8
        delay_ms = 2.0
        decay = 0.4
        speed = 0.5
213
        sample_rate = 8000
214

215
216
217
        data, path = self.get_whitenoise(sample_rate)
        result = F.phaser(data, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=False)
        self.assert_sox_effect(result, path, ['phaser', gain_in, gain_out, delay_ms, decay, speed, '-t'])
218

219
220
221
222
223
224
225
    def test_flanger_triangle_linear(self):
        delay = 0.6
        depth = 0.87
        regen = 3.0
        width = 0.9
        speed = 0.5
        phase = 30
226
        sample_rate = 8000
227

228
229
230
231
232
233
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='triangular', interpolation='linear')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'triangle', phase, 'linear'])
234
235
236
237
238
239
240
241

    def test_flanger_triangle_quad(self):
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.4
        speed = 0.5
        phase = 40
242
        sample_rate = 8000
243

244
245
246
247
248
249
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='triangular', interpolation='quadratic')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'triangle', phase, 'quadratic'])
250
251
252
253
254
255
256
257

    def test_flanger_sine_linear(self):
        delay = 0.8
        depth = 0.88
        regen = 3.0
        width = 0.23
        speed = 1.3
        phase = 60
258
        sample_rate = 8000
259

260
261
262
263
264
265
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='sinusoidal', interpolation='linear')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'sine', phase, 'linear'])
266
267
268
269
270
271
272
273

    def test_flanger_sine_quad(self):
        delay = 0.9
        depth = 0.9
        regen = 4.0
        width = 0.23
        speed = 1.3
        phase = 25
274
        sample_rate = 8000
275

276
277
278
279
280
281
        data, path = self.get_whitenoise(sample_rate)
        result = F.flanger(
            data, sample_rate, delay, depth, regen, width, speed, phase,
            modulation='sinusoidal', interpolation='quadratic')
        self.assert_sox_effect(
            result, path, ['flanger', delay, depth, regen, width, speed, 'sine', phase, 'quadratic'])
282

xinyang0's avatar
xinyang0 committed
283
    def test_equalizer(self):
284
285
286
        center_freq = 300
        q = 0.707
        gain = 1
287
        sample_rate = 8000
xinyang0's avatar
xinyang0 committed
288

289
290
291
        data, path = self.get_whitenoise(sample_rate)
        result = F.equalizer_biquad(data, sample_rate, center_freq, gain, q)
        self.assert_sox_effect(result, path, ['equalizer', center_freq, q, gain])
xinyang0's avatar
xinyang0 committed
292

293
294
295
296
297
298
299
300
    def test_perf_biquad_filtering(self):
        b0 = 0.4
        b1 = 0.2
        b2 = 0.9
        a0 = 0.7
        a1 = 0.2
        a2 = 0.6

301
302
303
        data, path = self.get_whitenoise()
        result = F.lfilter(data, torch.tensor([a0, a1, a2]), torch.tensor([b0, b1, b2]))
        self.assert_sox_effect(result, path, ['biquad', b0, b1, b2, a0, a1, a2])
moto's avatar
moto committed
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331

    @parameterized.expand([
        ('q', 'quarter_sine'),
        ('h', 'half_sine'),
        ('t', 'linear'),
    ])
    def test_fade(self, fade_shape_sox, fade_shape):
        fade_in_len, fade_out_len = 44100, 44100
        data, path = self.get_whitenoise(sample_rate=44100)
        result = T.Fade(fade_in_len, fade_out_len, fade_shape)(data)
        self.assert_sox_effect(result, path, ['fade', fade_shape_sox, '1', '0', '1'])

    @parameterized.expand([
        ('amplitude', 1.1),
        ('db', 2),
        ('power', 2),
    ])
    def test_vol(self, gain_type, gain):
        data, path = self.get_whitenoise()
        result = T.Vol(gain, gain_type)(data)
        self.assert_sox_effect(result, path, ['vol', f'{gain}', gain_type])

    @parameterized.expand(['vad-go-stereo-44100.wav', 'vad-go-mono-32000.wav'])
    def test_vad(self, filename):
        path = get_asset_path(filename)
        data, sample_rate = load_wav(path)
        result = T.Vad(sample_rate)(data)
        self.assert_sox_effect(result, path, ['vad'])