test_sox_effects.py 13 KB
Newer Older
David Pollack's avatar
David Pollack committed
1
2
3
4
5
import unittest
import torch
import torchaudio
import math

6
7
from . import common_utils
from .common_utils import AudioBackendScope, BACKENDS
David Pollack's avatar
David Pollack committed
8

9

David Pollack's avatar
David Pollack committed
10
class Test_SoxEffectsChain(unittest.TestCase):
11
    test_filepath = common_utils.get_asset_path("steam-train-whistle-daniel_simon.mp3")
David Pollack's avatar
David Pollack committed
12

13
14
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
15
    def test_single_channel(self):
16
        fn_sine = common_utils.get_asset_path("sinewave.wav")
David Pollack's avatar
David Pollack committed
17
18
19
20
21
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(fn_sine)
        E.append_effect_to_chain("echos", [0.8, 0.7, 40, 0.25, 63, 0.3])
        x, sr = E.sox_build_flow_effects()
        # check if effects worked
22
        # print(x.size())
David Pollack's avatar
David Pollack committed
23

24
25
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
26
27
28
    def test_rate_channels(self):
        target_rate = 16000
        target_channels = 1
David Pollack's avatar
David Pollack committed
29
        E = torchaudio.sox_effects.SoxEffectsChain()
David Pollack's avatar
David Pollack committed
30
        E.set_input_file(self.test_filepath)
David Pollack's avatar
David Pollack committed
31
32
        E.append_effect_to_chain("rate", [target_rate])
        E.append_effect_to_chain("channels", [target_channels])
David Pollack's avatar
David Pollack committed
33
34
35
36
37
        x, sr = E.sox_build_flow_effects()
        # check if effects worked
        self.assertEqual(sr, target_rate)
        self.assertEqual(x.size(0), target_channels)

38
39
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
40
    def test_lowpass_speed(self):
David Pollack's avatar
David Pollack committed
41
42
        speed = .8
        si, _ = torchaudio.info(self.test_filepath)
David Pollack's avatar
David Pollack committed
43
        E = torchaudio.sox_effects.SoxEffectsChain()
David Pollack's avatar
David Pollack committed
44
        E.set_input_file(self.test_filepath)
David Pollack's avatar
David Pollack committed
45
46
47
        E.append_effect_to_chain("lowpass", 100)
        E.append_effect_to_chain("speed", speed)
        E.append_effect_to_chain("rate", si.rate)
David Pollack's avatar
David Pollack committed
48
49
50
51
        x, sr = E.sox_build_flow_effects()
        # check if effects worked
        self.assertEqual(x.size(1), int((si.length / si.channels) / speed))

52
53
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
54
55
56
57
58
59
60
    def test_ulaw_and_siginfo(self):
        si_out = torchaudio.sox_signalinfo_t()
        ei_out = torchaudio.sox_encodinginfo_t()
        si_out.precision = 8
        ei_out.encoding = torchaudio.get_sox_encoding_t(9)
        ei_out.bits_per_sample = 8
        si_in, ei_in = torchaudio.info(self.test_filepath)
61
62
        si_out.rate = 44100
        si_out.channels = 2
David Pollack's avatar
David Pollack committed
63
        E = torchaudio.sox_effects.SoxEffectsChain(out_siginfo=si_out, out_encinfo=ei_out)
David Pollack's avatar
David Pollack committed
64
65
        E.set_input_file(self.test_filepath)
        x, sr = E.sox_build_flow_effects()
66
        # Note: the output was encoded into ulaw because the
David Pollack's avatar
David Pollack committed
67
        #       number of unique values in the output is less than 256.
68
        self.assertLess(x.unique().size(0), 2**8 + 1)
David Pollack's avatar
David Pollack committed
69
70
        self.assertEqual(x.numel(), si_in.length)

71
72
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
73
74
    def test_band_chorus(self):
        si_in, ei_in = torchaudio.info(self.test_filepath)
75
        ei_in.encoding = torchaudio.get_sox_encoding_t(1)
David Pollack's avatar
David Pollack committed
76
77
78
79
        E = torchaudio.sox_effects.SoxEffectsChain(out_encinfo=ei_in, out_siginfo=si_in)
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("band", ["-n", "10k", "3.5k"])
        E.append_effect_to_chain("chorus", [.5, .7, 55, 0.4, .25, 2, '-s'])
80
81
        E.append_effect_to_chain("rate", [si_in.rate])
        E.append_effect_to_chain("channels", [si_in.channels])
David Pollack's avatar
David Pollack committed
82
        x, sr = E.sox_build_flow_effects()
83
        # The chorus effect will make the output file longer than the input
David Pollack's avatar
David Pollack committed
84
85
        self.assertEqual(x.size(0), si_in.channels)
        self.assertGreaterEqual(x.size(1) * x.size(0), si_in.length)
David Pollack's avatar
David Pollack committed
86

87
88
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
89
90
    def test_synth(self):
        si_in, ei_in = torchaudio.info(self.test_filepath)
91
        len_in_seconds = si_in.length / si_in.channels / si_in.rate
92
        ei_in.encoding = torchaudio.get_sox_encoding_t(1)
David Pollack's avatar
David Pollack committed
93
94
        E = torchaudio.sox_effects.SoxEffectsChain(out_encinfo=ei_in, out_siginfo=si_in)
        E.set_input_file(self.test_filepath)
95
        E.append_effect_to_chain("synth", [str(len_in_seconds), "pinknoise", "mix"])
David Pollack's avatar
David Pollack committed
96
97
98
        E.append_effect_to_chain("rate", [44100])
        E.append_effect_to_chain("channels", [2])
        x, sr = E.sox_build_flow_effects()
David Pollack's avatar
David Pollack committed
99
        self.assertEqual(x.size(0), si_in.channels)
100
        self.assertEqual(si_in.length, x.size(0) * x.size(1))
David Pollack's avatar
David Pollack committed
101

102
103
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
    def test_gain(self):
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("gain", ["5"])
        x, sr = E.sox_build_flow_effects()
        E.clear_chain()
        self.assertTrue(x.abs().max().item(), 1.)
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("gain", ["-e", "-5"])
        x, sr = E.sox_build_flow_effects()
        E.clear_chain()
        self.assertLess(x.abs().max().item(), 1.)
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("gain", ["-b", "8"])
        x, sr = E.sox_build_flow_effects()
        E.clear_chain()
        self.assertTrue(x.abs().max().item(), 1.)
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("gain", ["-n", "-10"])
        x, sr = E.sox_build_flow_effects()
        E.clear_chain()
        self.assertLess(x.abs().max().item(), 1.)

127
128
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
129
    def test_tempo_or_speed(self):
David Pollack's avatar
David Pollack committed
130
131
132
133
134
135
136
        tempo = .8
        si, _ = torchaudio.info(self.test_filepath)
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("tempo", ["-s", tempo])
        x, sr = E.sox_build_flow_effects()
        # check if effect worked
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
        self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / tempo), delta=1)
        # tempo > 1
        E.clear_chain()
        tempo = 1.2
        E.append_effect_to_chain("tempo", ["-s", tempo])
        x, sr = E.sox_build_flow_effects()
        # check if effect worked
        self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / tempo), delta=1)
        # tempo > 1
        E.clear_chain()
        speed = 1.2
        E.append_effect_to_chain("speed", [speed])
        E.append_effect_to_chain("rate", [si.rate])
        x, sr = E.sox_build_flow_effects()
        # check if effect worked
        self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / speed), delta=1)
        # speed < 1
        E.clear_chain()
        speed = 0.8
        E.append_effect_to_chain("speed", [speed])
        E.append_effect_to_chain("rate", [si.rate])
        x, sr = E.sox_build_flow_effects()
        # check if effect worked
        self.assertAlmostEqual(x.size(1), math.ceil((si.length / si.channels) / speed), delta=1)
David Pollack's avatar
David Pollack committed
161

162
163
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
164
165
166
167
    def test_trim(self):
        x_orig, _ = torchaudio.load(self.test_filepath)
        offset = "10000s"
        offset_int = int(offset[:-1])
168
        num_frames = "20000s"
David Pollack's avatar
David Pollack committed
169
170
171
172
173
174
        num_frames_int = int(num_frames[:-1])
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("trim", [offset, num_frames])
        x, sr = E.sox_build_flow_effects()
        # check if effect worked
175
        self.assertTrue(x.allclose(x_orig[:, offset_int:(offset_int + num_frames_int)], rtol=1e-4, atol=1e-4))
David Pollack's avatar
David Pollack committed
176

177
178
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
179
180
181
182
183
184
185
186
187
188
    def test_silence_contrast(self):
        si, _ = torchaudio.info(self.test_filepath)
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("silence", [1, 100, 1])
        E.append_effect_to_chain("contrast", [])
        x, sr = E.sox_build_flow_effects()
        # check if effect worked
        self.assertLess(x.numel(), si.length)

189
190
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
191
192
193
194
195
196
197
198
199
200
    def test_reverse(self):
        x_orig, _ = torchaudio.load(self.test_filepath)
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("reverse", "")
        x_rev, _ = E.sox_build_flow_effects()
        # check if effect worked
        rev_idx = torch.LongTensor(range(x_orig.size(1))[::-1])
        self.assertTrue(x_orig.allclose(x_rev[:, rev_idx], rtol=1e-5, atol=2e-5))

201
202
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
203
204
205
206
207
208
209
    def test_compand_fade(self):
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        E.append_effect_to_chain("compand", ["0.3,1", "6:-70,-60,-20", "-5", "-90", "0.2"])
        E.append_effect_to_chain("fade", ["q", "0.25", "0", "0.33"])
        x, _ = E.sox_build_flow_effects()
        # check if effect worked
210
        # print(x.size())
David Pollack's avatar
David Pollack committed
211

212
213
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
214
215
216
217
    def test_biquad_delay(self):
        si, _ = torchaudio.info(self.test_filepath)
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
218
219
        E.append_effect_to_chain("biquad", ["0.25136437", "0.50272873", "0.25136437",
                                            "1.0", "-0.17123075", "0.17668821"])
David Pollack's avatar
David Pollack committed
220
221
222
223
224
        E.append_effect_to_chain("delay", ["15000s"])
        x, _ = E.sox_build_flow_effects()
        # check if effect worked
        self.assertTrue(x.size(1) == (si.length / si.channels) + 15000)

225
226
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
227
228
229
230
231
232
233
    def test_invalid_effect_name(self):
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        # there is no effect named "special"
        with self.assertRaises(LookupError):
            E.append_effect_to_chain("special", [""])

234
235
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
236
237
238
239
240
241
242
    def test_unimplemented_effect(self):
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        # the sox spectrogram function is not implemented in torchaudio
        with self.assertRaises(NotImplementedError):
            E.append_effect_to_chain("spectrogram", [""])

243
244
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
David Pollack's avatar
David Pollack committed
245
246
247
248
249
250
251
252
    def test_invalid_effect_options(self):
        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(self.test_filepath)
        # first two options should be combined to "0.3,1"
        E.append_effect_to_chain("compand", ["0.3", "1", "6:-70,-60,-20", "-5", "-90", "0.2"])
        with self.assertRaises(RuntimeError):
            E.sox_build_flow_effects()

253
254
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
Tomás Osório's avatar
Tomás Osório committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
    def test_fade(self):
        x_orig, _ = torchaudio.load(self.test_filepath)
        fade_in_len = 44100
        fade_out_len = 44100

        for fade_shape_sox, fade_shape_torchaudio in (("q", "quarter_sine"), ("h", "half_sine"), ("t", "linear")):
            E = torchaudio.sox_effects.SoxEffectsChain()
            E.set_input_file(self.test_filepath)
            E.append_effect_to_chain("fade", [fade_shape_sox, 1, "0", 1])
            x, sr = E.sox_build_flow_effects()

            fade = torchaudio.transforms.Fade(fade_in_len, fade_out_len, fade_shape_torchaudio)

            # check if effect worked
            self.assertTrue(x.allclose(fade(x_orig), rtol=1e-4, atol=1e-4))

271
272
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
Tomás Osório's avatar
Tomás Osório committed
273
274
275
276
277
278
279
280
281
282
283
284
    def test_vol(self):
        x_orig, _ = torchaudio.load(self.test_filepath)

        for gain, gain_type in ((1.1, "amplitude"), (2, "db"), (2, "power")):
            E = torchaudio.sox_effects.SoxEffectsChain()
            E.set_input_file(self.test_filepath)
            E.append_effect_to_chain("vol", [gain, gain_type])
            x, sr = E.sox_build_flow_effects()

            vol = torchaudio.transforms.Vol(gain, gain_type)
            z = vol(x_orig)
            # check if effect worked
moto's avatar
moto committed
285
            self.assertTrue(x.allclose(z, rtol=1e-4, atol=1e-4))
Tomás Osório's avatar
Tomás Osório committed
286

287
288
    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
    @AudioBackendScope("sox")
Artyom Astafurov's avatar
Artyom Astafurov committed
289
290
    def test_vad(self):
        sample_files = [
291
292
            common_utils.get_asset_path("vad-go-stereo-44100.wav"),
            common_utils.get_asset_path("vad-go-mono-32000.wav")
Artyom Astafurov's avatar
Artyom Astafurov committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
        ]

        for sample_file in sample_files:
            E = torchaudio.sox_effects.SoxEffectsChain()
            E.set_input_file(sample_file)
            E.append_effect_to_chain("vad")
            x, _ = E.sox_build_flow_effects()

            x_orig, sample_rate = torchaudio.load(sample_file)
            vad = torchaudio.transforms.Vad(sample_rate)

            y = vad(x_orig)
            self.assertTrue(x.allclose(y, rtol=1e-4, atol=1e-4))

David Pollack's avatar
David Pollack committed
307

David Pollack's avatar
David Pollack committed
308
if __name__ == '__main__':
309
    unittest.main()