Add AudioEffector tutorial (#3226)

Summary: https://output.circle-artifacts.com/output/job/fbfa6d9a-5014-42ac-8e77-c1e9565747e8/artifacts/0/docs/tutorials/effector_tutorial.html Pull Request resolved: https://github.com/pytorch/audio/pull/3226 Reviewed By: nateanl Differential Revision: D45402724 Pulled By: mthrok fbshipit-source-id: bc9d1bc071f6f5062b9cc35d743b4a3016306262

Add AudioEffector tutorial (#3226)
Summary: https://output.circle-artifacts.com/output/job/fbfa6d9a-5014-42ac-8e77-c1e9565747e8/artifacts/0/docs/tutorials/effector_tutorial.html Pull Request resolved: https://github.com/pytorch/audio/pull/3226 Reviewed By: nateanl Differential Revision: D45402724 Pulled By: mthrok fbshipit-source-id: bc9d1bc071f6f5062b9cc35d743b4a3016306262
2ab49e5b · moto · Facebook GitHub Bot · 667c6a9e · 2ab49e5b · 2ab49e5b
Commit 2ab49e5b authored May 09, 2023 by moto Committed by Facebook GitHub Bot May 09, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 379 additions and 0 deletions

docs/source/index.rst docs/source/index.rst +8 -0

examples/tutorials/effector_tutorial.py examples/tutorials/effector_tutorial.py +371 -0

No files found.
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -46,6 +46,7 @@ model implementations and application components.
   tutorials/streamwriter_advanced
   hw_acceleration_tutorial
+   tutorials/effector_tutorial
   tutorials/audio_resampling_tutorial
   tutorials/audio_data_augmentation_tutorial
   tutorials/audio_feature_extractions_tutorial
@@ -183,6 +184,13 @@ Tutorials
   :link: hw_acceleration_tutorial.html
   :tags: I/O,StreamReader,StreamWriter
+.. customcarditem::
+   :header: Apply effects and codecs to waveform
+   :card_description: Learn how to apply effects and codecs to waveform using <code>torchaudio.io.AudioEffector</code>.
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/effector_tutorial.png
+   :link: tutorials/effector_tutorial.html
+   :tags: Preprocessing
 .. customcarditem::
   :header: Audio resampling with bandlimited sinc interpolation
   :card_description: Learn how to resample audio tensor with <code>torchaudio.functional.resample</code> and <code>torchaudio.transforms.Resample</code>.

--- a/examples/tutorials/effector_tutorial.py
+++ b/examples/tutorials/effector_tutorial.py
+"""
+AudioEffector Usages
+====================
+**Author**: `Moto Hira <moto@meta.com>`__
+This tutorial shows how to use :py:class:`torchaudio.io.AudioEffector` to
+apply various effects and codecs to waveform tensor.
+"""
+######################################################################
+#
+# .. note::
+#
+#    This tutorial requires FFmpeg libraries (>=4.1, <5).
+#
+#    There are multiple ways to install FFmpeg libraries.
+#    If you are using Anaconda Python distribution,
+#    ``conda install -c anaconda 'ffmpeg<5'`` will install
+#    the required libraries.
+#
+######################################################################
+# Overview
+# --------
+#
+# :py:class:`~torchaudio.io.AudioEffector` combines in-memory encoding,
+# decoding and filtering that are provided by
+# :py:class:`~torchaudio.io.StreamWriter` and
+# :py:class:`~torchaudio.io.StreamReader`.
+#
+# The following figure illustrates the process.
+#
+# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/AudioEffector.png
+#
+import torch
+import torchaudio
+print(torch.__version__)
+print(torchaudio.__version__)
+######################################################################
+#
+try:
+    from torchaudio.io import AudioEffector, CodecConfig
+except ImportError as err:
+    raise RuntimeError(
+        "This tutorial requires nightly build of TorchAudio. "
+        "Please install the nightly versions of PyTorch and torchaudio. "
+        "https://pytorch.org/get-started/locally/"
+    ) from err
+import matplotlib.pyplot as plt
+from IPython.display import Audio
+######################################################################
+#
+try:
+    for k, v in torchaudio.utils.ffmpeg_utils.get_versions().items():
+        print(k, v)
+except Exception:
+    raise RuntimeError(
+        "This tutorial requires FFmpeg libraries 4.2>,<5. "
+        "Please install FFmpeg."
+    )
+######################################################################
+# Usage
+# -----
+#
+# To use ``AudioEffector``, instantiate it with ``effect`` and
+# ``format``, then either pass the waveform to
+# :py:meth:`~torchaudio.io.AudioEffector.apply` or
+# :py:meth:`~torchaudio.io.AudioEffector.stream` method.
+#
+# .. code:: python
+#
+#    effector = AudioEffector(effect=..., format=...,)
+#
+#    # Apply at once
+#    applied = effector.apply(waveform, sample_rate)
+#
+# ``apply`` method applies effect and codec to the entire waveform at
+# once. So if the input waveform is long, and memory consumption is an
+# issue, one can use ``stream`` method to process chunk by chunk.
+#
+# .. code:: python
+#
+#    # Apply chunk by chunk
+#    for applied_chunk = effector.stream(waveform, sample_rate):
+#        ...
+#
+######################################################################
+# Example
+# -------
+#
+src = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+waveform, sr = torchaudio.load(src, channels_first=False)
+######################################################################
+# Gallery
+# -------
+#
+def show(effect=None, format=None, *, stereo=False):
+    wf = torch.cat([waveform] * 2, dim=1) if stereo else waveform
+    figsize = (6.4, 2.1 if stereo else 1.2)
+    effector = AudioEffector(effect=effect, format=format, pad_end=False)
+    result = effector.apply(wf, int(sr))
+    num_channels = result.size(1)
+    f, ax = plt.subplots(num_channels, 1, squeeze=False, figsize=figsize, sharex=True)
+    for i in range(num_channels):
+        ax[i][0].specgram(result[:, i], Fs=sr)
+    f.set_tight_layout(True)
+    return Audio(result.numpy().T, rate=sr)
+######################################################################
+# Original
+# --------
+#
+show(effect=None, format=None)
+######################################################################
+# Effects
+# -------
+#
+######################################################################
+# tempo
+# ~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#atempo
+show(effect="atempo=0.7")
+######################################################################
+#
+show(effect="atempo=1.8")
+######################################################################
+# highpass
+# ~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#highpass
+show(effect="highpass=frequency=1500")
+######################################################################
+# lowpass
+# ~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#lowpass
+show(effect="lowpass=frequency=1000")
+######################################################################
+# allpass
+# ~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#allpass
+show(effect="allpass")
+######################################################################
+# bandpass
+# ~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#bandpass
+show(effect="bandpass=frequency=3000")
+######################################################################
+# bandreject
+# ~~~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#bandreject
+show(effect="bandreject=frequency=3000")
+######################################################################
+# echo
+# ~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#aecho
+show(effect="aecho=in_gain=0.8:out_gain=0.88:delays=6:decays=0.4")
+######################################################################
+#
+show(effect="aecho=in_gain=0.8:out_gain=0.88:delays=60:decays=0.4")
+######################################################################
+#
+show(effect="aecho=in_gain=0.8:out_gain=0.9:delays=1000:decays=0.3")
+######################################################################
+# chorus
+# ~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#chorus
+show(effect=("chorus=0.5:0.9:50|60|40:0.4|0.32|0.3:0.25|0.4|0.3:2|2.3|1.3"))
+######################################################################
+# fft filter
+# ~~~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#afftfilt
+show(effect=(
+    "afftfilt="
+    "real='re * (1-clip(b * (b/nb), 0, 1))':"
+    "imag='im * (1-clip(b * (b/nb), 0, 1))'"))
+######################################################################
+#
+show(effect=(
+    "afftfilt="
+    "real='hypot(re,im) * sin(0)':"
+    "imag='hypot(re,im) * cos(0)':"
+    "win_size=512:"
+    "overlap=0.75"))
+######################################################################
+#
+show(effect=(
+    "afftfilt="
+    "real='hypot(re,im) * cos(2 * 3.14 * (random(0) * 2-1))':"
+    "imag='hypot(re,im) * sin(2 * 3.14 * (random(1) * 2-1))':"
+    "win_size=128:"
+    "overlap=0.8"))
+######################################################################
+# vibrato
+# ~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#vibrato
+show(effect=("vibrato=f=10:d=0.8"))
+######################################################################
+# tremolo
+# ~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#tremolo
+show(effect=("tremolo=f=8:d=0.8"))
+######################################################################
+# crystalizer
+# ~~~~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#crystalizer
+show(effect=("crystalizer"))
+######################################################################
+# flanger
+# ~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#flanger
+show(effect=("flanger"))
+######################################################################
+# phaser
+# ~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#aphaser
+show(effect=("aphaser"))
+######################################################################
+# pulsator
+# ~~~~~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#apulsator
+show(effect=("apulsator"), stereo=True)
+######################################################################
+# haas
+# ~~~~
+# https://ffmpeg.org/ffmpeg-filters.html#haas
+show(effect=("haas"))
+######################################################################
+# Codecs
+# ------
+#
+def show_multi(configs):
+    results = []
+    for config in configs:
+        effector = AudioEffector(**config)
+        results.append(effector.apply(waveform, int(sr)))
+    num_configs = len(configs)
+    figsize = (6.4, 0.3 + num_configs * 0.9)
+    f, axes = plt.subplots(num_configs, 1, figsize=figsize, sharex=True)
+    for result, ax in zip(results, axes):
+        ax.specgram(result[:, 0], Fs=sr)
+    f.set_tight_layout(True)
+    return [Audio(r.numpy().T, rate=sr) for r in results]
+######################################################################
+# ogg
+# ~~~
+#
+results = show_multi([
+    {"format": "ogg"},
+    {"format": "ogg", "encoder": "vorbis"},
+    {"format": "ogg", "encoder": "opus"},
+])
+######################################################################
+# ogg - default encoder (flac)
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+results[0]
+######################################################################
+# ogg - vorbis
+# ^^^^^^^^^^^^
+#
+results[1]
+######################################################################
+# ogg - opus
+# ^^^^^^^^^^
+#
+results[2]
+######################################################################
+# mp3
+# ~~~
+# https://trac.ffmpeg.org/wiki/Encode/MP3
+results = show_multi([
+    {"format": "mp3"},
+    {"format": "mp3", "codec_config": CodecConfig(compression_level=1)},
+    {"format": "mp3", "codec_config": CodecConfig(compression_level=9)},
+    {"format": "mp3", "codec_config": CodecConfig(bit_rate=192_000)},
+    {"format": "mp3", "codec_config": CodecConfig(bit_rate=8_000)},
+    {"format": "mp3", "codec_config": CodecConfig(qscale=9)},
+    {"format": "mp3", "codec_config": CodecConfig(qscale=1)},
+])
+######################################################################
+# default
+# ^^^^^^^
+results[0]
+######################################################################
+# compression_level=1
+# ^^^^^^^^^^^^^^^^^^^
+results[1]
+######################################################################
+# compression_level=9
+# ^^^^^^^^^^^^^^^^^^^
+results[2]
+######################################################################
+# bit_rate=192k
+# ^^^^^^^^^^^^^
+results[3]
+######################################################################
+# bit_rate=8k
+# ^^^^^^^^^^^^^
+results[4]
+######################################################################
+# qscale=9
+# ^^^^^^^^
+results[5]
+######################################################################
+# qscale=1
+# ^^^^^^^^
+results[6]
+######################################################################
+#
+# Tag: :obj:`torchaudio.io`