effector_tutorial.py 9.4 KB
Newer Older
moto's avatar
moto committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
"""
AudioEffector Usages
====================

**Author**: `Moto Hira <moto@meta.com>`__

This tutorial shows how to use :py:class:`torchaudio.io.AudioEffector` to
apply various effects and codecs to waveform tensor.

"""

######################################################################
#
# .. note::
#
16
17
18
#    This tutorial requires FFmpeg libraries.
#    Please refer to :ref:`FFmpeg dependency <ffmpeg_dependency>` for
#    the detail.
moto's avatar
moto committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#

######################################################################
# Overview
# --------
#
# :py:class:`~torchaudio.io.AudioEffector` combines in-memory encoding,
# decoding and filtering that are provided by
# :py:class:`~torchaudio.io.StreamWriter` and
# :py:class:`~torchaudio.io.StreamReader`.
#
# The following figure illustrates the process.
#
# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/AudioEffector.png
#
import torch
import torchaudio

print(torch.__version__)
print(torchaudio.__version__)

######################################################################
#
42
from torchaudio.io import AudioEffector, CodecConfig
moto's avatar
moto committed
43
44
45
46
47
48

import matplotlib.pyplot as plt
from IPython.display import Audio

######################################################################
#
49
50
for k, v in torchaudio.utils.ffmpeg_utils.get_versions().items():
    print(k, v)
moto's avatar
moto committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

######################################################################
# Usage
# -----
#
# To use ``AudioEffector``, instantiate it with ``effect`` and
# ``format``, then either pass the waveform to
# :py:meth:`~torchaudio.io.AudioEffector.apply` or
# :py:meth:`~torchaudio.io.AudioEffector.stream` method.
#
# .. code:: python
#
#    effector = AudioEffector(effect=..., format=...,)
#
#    # Apply at once
#    applied = effector.apply(waveform, sample_rate)
#
# ``apply`` method applies effect and codec to the entire waveform at
# once. So if the input waveform is long, and memory consumption is an
# issue, one can use ``stream`` method to process chunk by chunk.
#
# .. code:: python
#
#    # Apply chunk by chunk
#    for applied_chunk = effector.stream(waveform, sample_rate):
#        ...
#

######################################################################
# Example
# -------
#

src = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
waveform, sr = torchaudio.load(src, channels_first=False)


######################################################################
# Gallery
# -------
#


moto's avatar
moto committed
94
def show(effect, *, stereo=False):
moto's avatar
moto committed
95
96
97
    wf = torch.cat([waveform] * 2, dim=1) if stereo else waveform
    figsize = (6.4, 2.1 if stereo else 1.2)

moto's avatar
moto committed
98
    effector = AudioEffector(effect=effect, pad_end=False)
moto's avatar
moto committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
    result = effector.apply(wf, int(sr))

    num_channels = result.size(1)
    f, ax = plt.subplots(num_channels, 1, squeeze=False, figsize=figsize, sharex=True)
    for i in range(num_channels):
        ax[i][0].specgram(result[:, i], Fs=sr)
    f.set_tight_layout(True)

    return Audio(result.numpy().T, rate=sr)


######################################################################
# Original
# --------
#

moto's avatar
moto committed
115
show(effect=None)
moto's avatar
moto committed
116
117
118
119
120
121
122
123
124
125

######################################################################
# Effects
# -------
#

######################################################################
# tempo
# ~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#atempo
moto's avatar
moto committed
126
show("atempo=0.7")
moto's avatar
moto committed
127
128
129

######################################################################
#
moto's avatar
moto committed
130
show("atempo=1.8")
moto's avatar
moto committed
131
132
133
134
135

######################################################################
# highpass
# ~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#highpass
moto's avatar
moto committed
136
show("highpass=frequency=1500")
moto's avatar
moto committed
137
138
139
140
141

######################################################################
# lowpass
# ~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#lowpass
moto's avatar
moto committed
142
show("lowpass=frequency=1000")
moto's avatar
moto committed
143
144
145
146
147

######################################################################
# allpass
# ~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#allpass
moto's avatar
moto committed
148
show("allpass")
moto's avatar
moto committed
149
150
151
152
153

######################################################################
# bandpass
# ~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#bandpass
moto's avatar
moto committed
154
show("bandpass=frequency=3000")
moto's avatar
moto committed
155
156
157
158
159

######################################################################
# bandreject
# ~~~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#bandreject
moto's avatar
moto committed
160
show("bandreject=frequency=3000")
moto's avatar
moto committed
161
162
163
164
165

######################################################################
# echo
# ~~~~
# https://ffmpeg.org/ffmpeg-filters.html#aecho
moto's avatar
moto committed
166
show("aecho=in_gain=0.8:out_gain=0.88:delays=6:decays=0.4")
moto's avatar
moto committed
167
168
169

######################################################################
#
moto's avatar
moto committed
170
show("aecho=in_gain=0.8:out_gain=0.88:delays=60:decays=0.4")
moto's avatar
moto committed
171
172
173

######################################################################
#
moto's avatar
moto committed
174
show("aecho=in_gain=0.8:out_gain=0.9:delays=1000:decays=0.3")
moto's avatar
moto committed
175
176
177
178
179

######################################################################
# chorus
# ~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#chorus
moto's avatar
moto committed
180
show("chorus=0.5:0.9:50|60|40:0.4|0.32|0.3:0.25|0.4|0.3:2|2.3|1.3")
moto's avatar
moto committed
181
182
183
184
185

######################################################################
# fft filter
# ~~~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#afftfilt
moto's avatar
moto committed
186
187
188

# fmt: off
show(
moto's avatar
moto committed
189
190
    "afftfilt="
    "real='re * (1-clip(b * (b/nb), 0, 1))':"
moto's avatar
moto committed
191
192
    "imag='im * (1-clip(b * (b/nb), 0, 1))'"
)
moto's avatar
moto committed
193
194
195

######################################################################
#
moto's avatar
moto committed
196
197

show(
moto's avatar
moto committed
198
199
200
201
    "afftfilt="
    "real='hypot(re,im) * sin(0)':"
    "imag='hypot(re,im) * cos(0)':"
    "win_size=512:"
moto's avatar
moto committed
202
203
    "overlap=0.75"
)
moto's avatar
moto committed
204
205
206

######################################################################
#
moto's avatar
moto committed
207
208

show(
moto's avatar
moto committed
209
210
211
212
    "afftfilt="
    "real='hypot(re,im) * cos(2 * 3.14 * (random(0) * 2-1))':"
    "imag='hypot(re,im) * sin(2 * 3.14 * (random(1) * 2-1))':"
    "win_size=128:"
moto's avatar
moto committed
213
214
215
    "overlap=0.8"
)
# fmt: on
moto's avatar
moto committed
216
217
218
219
220

######################################################################
# vibrato
# ~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#vibrato
moto's avatar
moto committed
221
show("vibrato=f=10:d=0.8")
moto's avatar
moto committed
222
223
224
225
226

######################################################################
# tremolo
# ~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#tremolo
moto's avatar
moto committed
227
show("tremolo=f=8:d=0.8")
moto's avatar
moto committed
228
229
230
231
232

######################################################################
# crystalizer
# ~~~~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#crystalizer
moto's avatar
moto committed
233
show("crystalizer")
moto's avatar
moto committed
234
235
236
237
238

######################################################################
# flanger
# ~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#flanger
moto's avatar
moto committed
239
show("flanger")
moto's avatar
moto committed
240
241
242
243
244

######################################################################
# phaser
# ~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#aphaser
moto's avatar
moto committed
245
show("aphaser")
moto's avatar
moto committed
246
247
248
249
250

######################################################################
# pulsator
# ~~~~~~~~
# https://ffmpeg.org/ffmpeg-filters.html#apulsator
moto's avatar
moto committed
251
show("apulsator", stereo=True)
moto's avatar
moto committed
252
253
254
255
256

######################################################################
# haas
# ~~~~
# https://ffmpeg.org/ffmpeg-filters.html#haas
moto's avatar
moto committed
257
show("haas")
moto's avatar
moto committed
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285

######################################################################
# Codecs
# ------
#


def show_multi(configs):
    results = []
    for config in configs:
        effector = AudioEffector(**config)
        results.append(effector.apply(waveform, int(sr)))

    num_configs = len(configs)
    figsize = (6.4, 0.3 + num_configs * 0.9)
    f, axes = plt.subplots(num_configs, 1, figsize=figsize, sharex=True)
    for result, ax in zip(results, axes):
        ax.specgram(result[:, 0], Fs=sr)
    f.set_tight_layout(True)

    return [Audio(r.numpy().T, rate=sr) for r in results]


######################################################################
# ogg
# ~~~
#

moto's avatar
moto committed
286
287
288
289
290
291
292
results = show_multi(
    [
        {"format": "ogg"},
        {"format": "ogg", "encoder": "vorbis"},
        {"format": "ogg", "encoder": "opus"},
    ]
)
moto's avatar
moto committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316

######################################################################
# ogg - default encoder (flac)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
results[0]

######################################################################
# ogg - vorbis
# ^^^^^^^^^^^^
#
results[1]

######################################################################
# ogg - opus
# ^^^^^^^^^^
#
results[2]

######################################################################
# mp3
# ~~~
# https://trac.ffmpeg.org/wiki/Encode/MP3

moto's avatar
moto committed
317
318
319
320
321
322
323
324
325
326
327
results = show_multi(
    [
        {"format": "mp3"},
        {"format": "mp3", "codec_config": CodecConfig(compression_level=1)},
        {"format": "mp3", "codec_config": CodecConfig(compression_level=9)},
        {"format": "mp3", "codec_config": CodecConfig(bit_rate=192_000)},
        {"format": "mp3", "codec_config": CodecConfig(bit_rate=8_000)},
        {"format": "mp3", "codec_config": CodecConfig(qscale=9)},
        {"format": "mp3", "codec_config": CodecConfig(qscale=1)},
    ]
)
moto's avatar
moto committed
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

######################################################################
# default
# ^^^^^^^
results[0]

######################################################################
# compression_level=1
# ^^^^^^^^^^^^^^^^^^^
results[1]

######################################################################
# compression_level=9
# ^^^^^^^^^^^^^^^^^^^
results[2]

######################################################################
# bit_rate=192k
# ^^^^^^^^^^^^^
results[3]

######################################################################
# bit_rate=8k
# ^^^^^^^^^^^^^
results[4]

######################################################################
# qscale=9
# ^^^^^^^^
results[5]

######################################################################
# qscale=1
# ^^^^^^^^
results[6]

######################################################################
#
# Tag: :obj:`torchaudio.io`