"megatron/inference/text_generation_server.py" did not exist on "21d2b0fe695750f57742f950d36d3ebbf4ab4992"
test_batch_consistency.py 11.1 KB
Newer Older
1
2
3
4
5
6
7
"""Test numerical consistency among single input and batched input."""
import unittest

import torch
import torchaudio
import torchaudio.functional as F

8
from . import common_utils
9
10


moto's avatar
moto committed
11
12
class TestFunctional(common_utils.TorchaudioTestCase):
    backend = 'default'
13
    """Test functions defined in `functional` module"""
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    def assert_batch_consistency(
            self, functional, tensor, *args, batch_size=1, atol=1e-8, rtol=1e-5, seed=42, **kwargs):
        # run then batch the result
        torch.random.manual_seed(seed)
        expected = functional(tensor.clone(), *args, **kwargs)
        expected = expected.repeat([batch_size] + [1] * expected.dim())

        # batch the input and run
        torch.random.manual_seed(seed)
        pattern = [batch_size] + [1] * tensor.dim()
        computed = functional(tensor.repeat(pattern), *args, **kwargs)

        self.assertEqual(computed, expected, rtol=rtol, atol=atol)

    def assert_batch_consistencies(
            self, functional, tensor, *args, atol=1e-8, rtol=1e-5, seed=42, **kwargs):
        self.assert_batch_consistency(
            functional, tensor, *args, batch_size=1, atol=atol, rtol=rtol, seed=seed, **kwargs)
        self.assert_batch_consistency(
            functional, tensor, *args, batch_size=3, atol=atol, rtol=rtol, seed=seed, **kwargs)

35
36
37
38
39
40
41
42
43
44
45
    def test_griffinlim(self):
        n_fft = 400
        ws = 400
        hop = 200
        window = torch.hann_window(ws)
        power = 2
        normalize = False
        momentum = 0.99
        n_iter = 32
        length = 1000
        tensor = torch.rand((1, 201, 6))
46
        self.assert_batch_consistencies(
47
48
49
50
51
52
53
54
55
56
57
            F.griffinlim, tensor, window, n_fft, hop, ws, power, normalize, n_iter, momentum, length, 0, atol=5e-5
        )

    def test_detect_pitch_frequency(self):
        filenames = [
            'steam-train-whistle-daniel_simon.wav',  # 2ch 44100Hz
            # Files from https://www.mediacollege.com/audio/tone/download/
            '100Hz_44100Hz_16bit_05sec.wav',  # 1ch
            '440Hz_44100Hz_16bit_05sec.wav',  # 1ch
        ]
        for filename in filenames:
58
            filepath = common_utils.get_asset_path(filename)
59
            waveform, sample_rate = torchaudio.load(filepath)
60
            self.assert_batch_consistencies(F.detect_pitch_frequency, waveform, sample_rate)
61
62
63
64
65
66
67

    def test_istft(self):
        stft = torch.tensor([
            [[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
        ])
68
        self.assert_batch_consistencies(F.istft, stft, n_fft=4, length=4)
69

70
71
    def test_contrast(self):
        waveform = torch.rand(2, 100) - 0.5
72
        self.assert_batch_consistencies(F.contrast, waveform, enhancement_amount=80.)
73
74
75

    def test_dcshift(self):
        waveform = torch.rand(2, 100) - 0.5
76
        self.assert_batch_consistencies(F.dcshift, waveform, shift=0.5, limiter_gain=0.05)
77

78
79
    def test_overdrive(self):
        waveform = torch.rand(2, 100) - 0.5
80
        self.assert_batch_consistencies(F.overdrive, waveform, gain=45, colour=30)
81

82
83
84
    def test_phaser(self):
        filepath = common_utils.get_asset_path("whitenoise.wav")
        waveform, sample_rate = torchaudio.load(filepath)
85
        self.assert_batch_consistencies(F.phaser, waveform, sample_rate)
86

87
88
89
90
91
92
    def test_flanger(self):
        torch.random.manual_seed(40)
        waveform = torch.rand(2, 100) - 0.5
        sample_rate = 44100
        self.assert_batch_consistencies(F.flanger, waveform, sample_rate)

93
94
    def test_sliding_window_cmn(self):
        waveform = torch.randn(2, 1024) - 0.5
95
96
97
98
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=True, norm_vars=True)
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=True, norm_vars=False)
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=False, norm_vars=True)
        self.assert_batch_consistencies(F.sliding_window_cmn, waveform, center=False, norm_vars=False)
Artyom Astafurov's avatar
Artyom Astafurov committed
99
100

    def test_vad(self):
moto's avatar
moto committed
101
        common_utils.set_audio_backend('default')
102
        filepath = common_utils.get_asset_path("vad-go-mono-32000.wav")
Artyom Astafurov's avatar
Artyom Astafurov committed
103
        waveform, sample_rate = torchaudio.load(filepath)
104
        self.assert_batch_consistencies(F.vad, waveform, sample_rate=sample_rate)
105

106

moto's avatar
moto committed
107
108
109
class TestTransforms(common_utils.TorchaudioTestCase):
    backend = 'default'

110
111
112
113
114
115
116
117
118
119
    """Test suite for classes defined in `transforms` module"""
    def test_batch_AmplitudeToDB(self):
        spec = torch.rand((6, 201))

        # Single then transform then batch
        expected = torchaudio.transforms.AmplitudeToDB()(spec).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.AmplitudeToDB()(spec.repeat(3, 1, 1))

120
        self.assertEqual(computed, expected)
121
122
123
124
125
126
127
128
129
130

    def test_batch_Resample(self):
        waveform = torch.randn(2, 2786)

        # Single then transform then batch
        expected = torchaudio.transforms.Resample()(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Resample()(waveform.repeat(3, 1, 1))

131
        self.assertEqual(computed, expected)
132
133
134
135
136
137
138
139
140
141
142

    def test_batch_MelScale(self):
        specgram = torch.randn(2, 31, 2786)

        # Single then transform then batch
        expected = torchaudio.transforms.MelScale()(specgram).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MelScale()(specgram.repeat(3, 1, 1, 1))

        # shape = (3, 2, 201, 1394)
143
        self.assertEqual(computed, expected)
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

    def test_batch_InverseMelScale(self):
        n_mels = 32
        n_stft = 5
        mel_spec = torch.randn(2, n_mels, 32) ** 2

        # Single then transform then batch
        expected = torchaudio.transforms.InverseMelScale(n_stft, n_mels)(mel_spec).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.InverseMelScale(n_stft, n_mels)(mel_spec.repeat(3, 1, 1, 1))

        # shape = (3, 2, n_mels, 32)

        # Because InverseMelScale runs SGD on randomly initialized values so they do not yield
        # exactly same result. For this reason, tolerance is very relaxed here.
160
        self.assertEqual(computed, expected, atol=1.0, rtol=1e-5)
161
162
163
164
165
166
167
168
169
170
171

    def test_batch_compute_deltas(self):
        specgram = torch.randn(2, 31, 2786)

        # Single then transform then batch
        expected = torchaudio.transforms.ComputeDeltas()(specgram).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.ComputeDeltas()(specgram.repeat(3, 1, 1, 1))

        # shape = (3, 2, 201, 1394)
172
        self.assertEqual(computed, expected)
173
174

    def test_batch_mulaw(self):
175
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
176
177
178
179
180
181
182
183
184
185
186
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        waveform_encoded = torchaudio.transforms.MuLawEncoding()(waveform)
        expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1)
        computed = torchaudio.transforms.MuLawEncoding()(waveform_batched)

        # shape = (3, 2, 201, 1394)
187
        self.assertEqual(computed, expected)
188
189
190
191
192
193
194
195
196

        # Single then transform then batch
        waveform_decoded = torchaudio.transforms.MuLawDecoding()(waveform_encoded)
        expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MuLawDecoding()(computed)

        # shape = (3, 2, 201, 1394)
197
        self.assertEqual(computed, expected)
198
199

    def test_batch_spectrogram(self):
200
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
201
202
203
204
205
206
207
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        expected = torchaudio.transforms.Spectrogram()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Spectrogram()(waveform.repeat(3, 1, 1))
208
        self.assertEqual(computed, expected)
209
210

    def test_batch_melspectrogram(self):
211
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
212
213
214
215
216
217
218
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        expected = torchaudio.transforms.MelSpectrogram()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MelSpectrogram()(waveform.repeat(3, 1, 1))
219
        self.assertEqual(computed, expected)
220
221

    def test_batch_mfcc(self):
moto's avatar
moto committed
222
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
223
224
225
226
227
228
229
        waveform, _ = torchaudio.load(test_filepath)

        # Single then transform then batch
        expected = torchaudio.transforms.MFCC()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.MFCC()(waveform.repeat(3, 1, 1))
230
        self.assertEqual(computed, expected, atol=1e-4, rtol=1e-5)
231
232

    def test_batch_TimeStretch(self):
233
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        kwargs = {
            'n_fft': 2048,
            'hop_length': 512,
            'win_length': 2048,
            'window': torch.hann_window(2048),
            'center': True,
            'pad_mode': 'reflect',
            'normalized': True,
            'onesided': True,
        }
        rate = 2

        complex_specgrams = torch.stft(waveform, **kwargs)

        # Single then transform then batch
        expected = torchaudio.transforms.TimeStretch(
            fixed_rate=rate,
            n_freq=1025,
            hop_length=512,
        )(complex_specgrams).repeat(3, 1, 1, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.TimeStretch(
            fixed_rate=rate,
            n_freq=1025,
            hop_length=512,
        )(complex_specgrams.repeat(3, 1, 1, 1, 1))

264
        self.assertEqual(computed, expected, atol=1e-5, rtol=1e-5)
265
266

    def test_batch_Fade(self):
267
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
268
269
270
271
272
273
274
275
276
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100
        fade_in_len = 3000
        fade_out_len = 3000

        # Single then transform then batch
        expected = torchaudio.transforms.Fade(fade_in_len, fade_out_len)(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Fade(fade_in_len, fade_out_len)(waveform.repeat(3, 1, 1))
277
        self.assertEqual(computed, expected)
278
279

    def test_batch_Vol(self):
280
        test_filepath = common_utils.get_asset_path('steam-train-whistle-daniel_simon.wav')
281
282
283
284
285
286
287
        waveform, _ = torchaudio.load(test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        expected = torchaudio.transforms.Vol(gain=1.1)(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = torchaudio.transforms.Vol(gain=1.1)(waveform.repeat(3, 1, 1))
288
        self.assertEqual(computed, expected)
Vincent QB's avatar
Vincent QB committed
289
290
291
292


if __name__ == '__main__':
    unittest.main()