test_batch_consistency.py 11.1 KB
Newer Older
1
2
3
4
"""Test numerical consistency among single input and batched input."""
import unittest

import torch
5
from torch.testing._internal.common_utils import TestCase
6
7
8
import torchaudio
import torchaudio.functional as F

9
from . import common_utils
10
11


12
class TestFunctional(TestCase):
13
    """Test functions defined in `functional` module"""
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    def assert_batch_consistency(
            self, functional, tensor, *args, batch_size=1, atol=1e-8, rtol=1e-5, seed=42, **kwargs):
        # run then batch the result
        torch.random.manual_seed(seed)
        expected = functional(tensor.clone(), *args, **kwargs)
        expected = expected.repeat([batch_size] + [1] * expected.dim())

        # batch the input and run
        torch.random.manual_seed(seed)
        pattern = [batch_size] + [1] * tensor.dim()
        computed = functional(tensor.repeat(pattern), *args, **kwargs)

        self.assertEqual(computed, expected, rtol=rtol, atol=atol)

    def assert_batch_consistencies(
            self, functional, tensor, *args, atol=1e-8, rtol=1e-5, seed=42, **kwargs):
        self.assert_batch_consistency(
            functional, tensor, *args, batch_size=1, atol=atol, rtol=rtol, seed=seed, **kwargs)
        self.assert_batch_consistency(
            functional, tensor, *args, batch_size=3, atol=atol, rtol=rtol, seed=seed, **kwargs)

35
36
37
38
39
40
41
42
43
44
45
    def test_griffinlim(self):
        n_fft = 400
        ws = 400
        hop = 200
        window = torch.hann_window(ws)
        power = 2
        normalize = False
        momentum = 0.99
        n_iter = 32
        length = 1000
        tensor = torch.rand((1, 201, 6))
46
        self.assert_batch_consistencies(
47
48
49
50
51
52
53
54
55
56
57
            F.griffinlim, tensor, window, n_fft, hop, ws, power, normalize, n_iter, momentum, length, 0, atol=5e-5
        )

    def test_detect_pitch_frequency(self):
        filenames = [
            'steam-train-whistle-daniel_simon.wav',  # 2ch 44100Hz
            # Files from https://www.mediacollege.com/audio/tone/download/
            '100Hz_44100Hz_16bit_05sec.wav',  # 1ch
            '440Hz_44100Hz_16bit_05sec.wav',  # 1ch
        ]
        for filename in filenames:
58
            filepath = common_utils.get_asset_path(filename)
59
            waveform, sample_rate = torchaudio.load(filepath)
60
            self.assert_batch_consistencies(F.detect_pitch_frequency, waveform, sample_rate)
61
62
63
64
65
66
67

    def test_istft(self):
        stft = torch.tensor([
            [[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
        ])
68
        self.assert_batch_consistencies(F.istft, stft, n_fft=4, length=4)
69

70
71
    def test_contrast(self):
        waveform = torch.rand(2, 100) - 0.5
72
        self.assert_batch_consistencies(F.contrast, waveform, enhancement_amount=80.)
73
74
75

    def test_dcshift(self):
        waveform = torch.rand(2, 100) - 0.5
76
        self.assert_batch_consistencies(F.dcshift, waveform, shift=0.5, limiter_gain=0.05)
77

78
79
    def test_overdrive(self):
        waveform = torch.rand(2, 100) - 0.5
80
        self.assert_batch_consistencies(F.overdrive, waveform, gain=45, colour=30)
81

82
83
84
    def test_phaser(self):
        filepath = common_utils.get_asset_path("whitenoise.wav")
        waveform, sample_rate = torchaudio.load(filepath)
85
        self.assert_batch_consistencies(F.phaser, waveform, sample_rate)
86

87
88
89
90
91
92
    def test_flanger(self):
        torch.random.manual_seed(40)
        waveform = torch.rand(2, 100) - 0.5
        sample_rate = 44100
        self.assert_batch_consistencies(F.flanger, waveform, sample_rate)

93
94
    def test_sliding_window_cmn(self):
        waveform = torch.randn(2, 1024) - 0.5
95
96
97
98
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=True, norm_vars=True)
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=True, norm_vars=False)
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=False, norm_vars=True)
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=False, norm_vars=False)
Artyom Astafurov's avatar
Artyom Astafurov committed
99
100

    def test_vad(self):
101
        filepath = common_utils.get_asset_path("vad-go-mono-32000.wav")
Artyom Astafurov's avatar
Artyom Astafurov committed
102
        waveform, sample_rate = torchaudio.load(filepath)
103
        self.assert_batch_consistencies(F.vad, waveform, sample_rate=sample_rate)
104

105

106
class TestTransforms(TestCase):
107
108
109
110
111
112
113
114
115
116
    """Test suite for classes defined in `transforms` module"""
    def test_batch_AmplitudeToDB(self):
        spec = torch.rand((6, 201))

        # Single then transform then batch
        expected = torchaudio.transforms.AmplitudeToDB()(spec).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.AmplitudeToDB()(spec.repeat(3, 1, 1))

117
        self.assertEqual(computed, expected)
118
119
120
121
122
123
124
125
126
127

    def test_batch_Resample(self):
        waveform = torch.randn(2, 2786)

        # Single then transform then batch
        expected = torchaudio.transforms.Resample()(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Resample()(waveform.repeat(3, 1, 1))

128
        self.assertEqual(computed, expected)
129
130
131
132
133
134
135
136
137
138
139

    def test_batch_MelScale(self):
        specgram = torch.randn(2, 31, 2786)

        # Single then transform then batch
        expected = torchaudio.transforms.MelScale()(specgram).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MelScale()(specgram.repeat(3, 1, 1, 1))

        # shape = (3, 2, 201, 1394)
140
        self.assertEqual(computed, expected)
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

    def test_batch_InverseMelScale(self):
        n_mels = 32
        n_stft = 5
        mel_spec = torch.randn(2, n_mels, 32) ** 2

        # Single then transform then batch
        expected = torchaudio.transforms.InverseMelScale(n_stft, n_mels)(mel_spec).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.InverseMelScale(n_stft, n_mels)(mel_spec.repeat(3, 1, 1, 1))

        # shape = (3, 2, n_mels, 32)

        # Because InverseMelScale runs SGD on randomly initialized values so they do not yield
        # exactly same result. For this reason, tolerance is very relaxed here.
157
        self.assertEqual(computed, expected, atol=1.0, rtol=1e-5)
158
159
160
161
162
163
164
165
166
167
168

    def test_batch_compute_deltas(self):
        specgram = torch.randn(2, 31, 2786)

        # Single then transform then batch
        expected = torchaudio.transforms.ComputeDeltas()(specgram).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.ComputeDeltas()(specgram.repeat(3, 1, 1, 1))

        # shape = (3, 2, 201, 1394)
169
        self.assertEqual(computed, expected)
170
171

    def test_batch_mulaw(self):
172
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
173
174
175
176
177
178
179
180
181
182
183
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        waveform_encoded = torchaudio.transforms.MuLawEncoding()(waveform)
        expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1)
        computed = torchaudio.transforms.MuLawEncoding()(waveform_batched)

        # shape = (3, 2, 201, 1394)
184
        self.assertEqual(computed, expected)
185
186
187
188
189
190
191
192
193

        # Single then transform then batch
        waveform_decoded = torchaudio.transforms.MuLawDecoding()(waveform_encoded)
        expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MuLawDecoding()(computed)

        # shape = (3, 2, 201, 1394)
194
        self.assertEqual(computed, expected)
195
196

    def test_batch_spectrogram(self):
197
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
198
199
200
201
202
203
204
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        expected = torchaudio.transforms.Spectrogram()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Spectrogram()(waveform.repeat(3, 1, 1))
205
        self.assertEqual(computed, expected)
206
207

    def test_batch_melspectrogram(self):
208
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
209
210
211
212
213
214
215
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        expected = torchaudio.transforms.MelSpectrogram()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MelSpectrogram()(waveform.repeat(3, 1, 1))
216
        self.assertEqual(computed, expected)
217
218

    def test_batch_mfcc(self):
moto's avatar
moto committed
219
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
220
221
222
223
224
225
226
        waveform, _ = torchaudio.load(test_filepath)

        # Single then transform then batch
        expected = torchaudio.transforms.MFCC()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MFCC()(waveform.repeat(3, 1, 1))
227
        self.assertEqual(computed, expected, atol=1e-4, rtol=1e-5)
228
229

    def test_batch_TimeStretch(self):
230
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        kwargs = {
            'n_fft': 2048,
            'hop_length': 512,
            'win_length': 2048,
            'window': torch.hann_window(2048),
            'center': True,
            'pad_mode': 'reflect',
            'normalized': True,
            'onesided': True,
        }
        rate = 2

        complex_specgrams = torch.stft(waveform, **kwargs)

        # Single then transform then batch
        expected = torchaudio.transforms.TimeStretch(
            fixed_rate=rate,
            n_freq=1025,
            hop_length=512,
        )(complex_specgrams).repeat(3, 1, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.TimeStretch(
            fixed_rate=rate,
            n_freq=1025,
            hop_length=512,
        )(complex_specgrams.repeat(3, 1, 1, 1, 1))

261
        self.assertEqual(computed, expected, atol=1e-5, rtol=1e-5)
262
263

    def test_batch_Fade(self):
264
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
265
266
267
268
269
270
271
272
273
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100
        fade_in_len = 3000
        fade_out_len = 3000

        # Single then transform then batch
        expected = torchaudio.transforms.Fade(fade_in_len, fade_out_len)(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Fade(fade_in_len, fade_out_len)(waveform.repeat(3, 1, 1))
274
        self.assertEqual(computed, expected)
275
276

    def test_batch_Vol(self):
277
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
278
279
280
281
282
283
284
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        expected = torchaudio.transforms.Vol(gain=1.1)(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Vol(gain=1.1)(waveform.repeat(3, 1, 1))
285
        self.assertEqual(computed, expected)
Vincent QB's avatar
Vincent QB committed
286
287
288
289


if __name__ == '__main__':
    unittest.main()