Split Streaming API tutorials into two (#2446)

Summary: The Streaming API tutorial has gotten long, so this commit split it into two. Pull Request resolved: https://github.com/pytorch/audio/pull/2446 Reviewed By: hwangjeff Differential Revision: D36987513 Pulled By: mthrok fbshipit-source-id: 13e3aad74c0d0e654c39c0eeceffca1a00b0dac4

Split Streaming API tutorials into two (#2446)
Summary: The Streaming API tutorial has gotten long, so this commit split it into two. Pull Request resolved: https://github.com/pytorch/audio/pull/2446 Reviewed By: hwangjeff Differential Revision: D36987513 Pulled By: mthrok fbshipit-source-id: 13e3aad74c0d0e654c39c0eeceffca1a00b0dac4
2d846263 · moto · Facebook GitHub Bot · 10d1bd89 · 2d846263 · 2d846263
Commit 2d846263 authored Jun 07, 2022 by moto Committed by Facebook GitHub Bot Jun 07, 2022
3 changed files
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -72,12 +72,13 @@ Getting Started
   :caption: Getting Started

   tutorials/audio_io_tutorial
+   tutorials/streaming_api_tutorial
+   tutorials/streaming_api2_tutorial
   tutorials/audio_resampling_tutorial
   tutorials/audio_data_augmentation_tutorial
   tutorials/audio_feature_extractions_tutorial
   tutorials/audio_feature_augmentation_tutorial
   tutorials/audio_datasets_tutorial
-   tutorials/streaming_api_tutorial

 Advanced Usages
 ---------------

--- a/examples/tutorials/streaming_api2_tutorial.py
+++ b/examples/tutorials/streaming_api2_tutorial.py
+"""
+Media Stream API - Pt. 2
+========================
+
+This tutorial is the continuation of
+`Media Stream API - Pt.1 <./streaming_api_tutorial.html>`__.
+
+This shows how to use `StreamReader` for
+
+- Device inputs, such as microphone, webcam and screen recording
+- Generating synthetic audio / video
+- Applying preprocessing with custom filter expressions
+"""
+
+import torch
+import torchaudio
+
+print(torch.__version__)
+print(torchaudio.__version__)
+
+######################################################################
+#
+
+try:
+    from torchaudio.io import StreamReader
+except ModuleNotFoundError:
+    try:
+        import google.colab
+
+        print(
+            """
+            To enable running this notebook in Google Colab, install nightly
+            torch and torchaudio builds and the requisite third party libraries by
+            adding the following code block to the top of the notebook before running it:
+
+            !pip3 uninstall -y torch torchvision torchaudio
+            !pip3 install --pre torch torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+            !add-apt-repository -y ppa:savoury1/ffmpeg4
+            !apt-get -qq install -y ffmpeg
+            """
+        )
+    except ModuleNotFoundError:
+        pass
+    raise
+
+import IPython
+import matplotlib.pyplot as plt
+
+base_url = "https://download.pytorch.org/torchaudio/tutorial-assets"
+AUDIO_URL = f"{base_url}/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
+VIDEO_URL = f"{base_url}/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"
+
+######################################################################
+# Audio / Video device input
+# --------------------------
+#
+# .. seealso::
+#
+#    - `Accelerated Video Decoding with NVDEC <../hw_acceleration_tutorial.html>`__.
+#    - `Online ASR with Emformer RNN-T <./online_asr_tutorial.html>`__.
+#    - `Device ASR with Emformer RNN-T <./device_asr.html>`__.
+#
+# Given that the system has proper media devices and libavdevice is
+# configured to use the devices, the streaming API can
+# pull media streams from these devices.
+#
+# To do this, we pass additional parameters ``format`` and ``option``
+# to the constructor. ``format`` specifies the device component and
+# ``option`` dictionary is specific to the specified component.
+#
+# The exact arguments to be passed depend on the system configuration.
+# Please refer to https://ffmpeg.org/ffmpeg-devices.html for the detail.
+#
+# The following example illustrates how one can do this on MacBook Pro.
+#
+# First, we need to check the available devices.
+#
+# .. code::
+#
+#    $ ffmpeg -f avfoundation -list_devices true -i ""
+#    [AVFoundation indev @ 0x143f04e50] AVFoundation video devices:
+#    [AVFoundation indev @ 0x143f04e50] [0] FaceTime HD Camera
+#    [AVFoundation indev @ 0x143f04e50] [1] Capture screen 0
+#    [AVFoundation indev @ 0x143f04e50] AVFoundation audio devices:
+#    [AVFoundation indev @ 0x143f04e50] [0] MacBook Pro Microphone
+#
+# We use `FaceTime HD Camera` as video device (index 0) and
+# `MacBook Pro Microphone` as audio device (index 0).
+#
+# If we do not pass any ``option``, the device uses its default
+# configuration. The decoder might not support the configuration.
+#
+# .. code::
+#
+#    >>> StreamReader(
+#    ...     src="0:0",  # The first 0 means `FaceTime HD Camera`, and
+#    ...                 # the second 0 indicates `MacBook Pro Microphone`.
+#    ...     format="avfoundation",
+#    ... )
+#    [avfoundation @ 0x125d4fe00] Selected framerate (29.970030) is not supported by the device.
+#    [avfoundation @ 0x125d4fe00] Supported modes:
+#    [avfoundation @ 0x125d4fe00]   1280x720@[1.000000 30.000000]fps
+#    [avfoundation @ 0x125d4fe00]   640x480@[1.000000 30.000000]fps
+#    Traceback (most recent call last):
+#      File "<stdin>", line 1, in <module>
+#      ...
+#    RuntimeError: Failed to open the input: 0:0
+#
+# By providing ``option``, we can change the format that the device
+# streams to a format supported by decoder.
+#
+# .. code::
+#
+#    >>> streamer = StreamReader(
+#    ...     src="0:0",
+#    ...     format="avfoundation",
+#    ...     option={"framerate": "30", "pixel_format": "bgr0"},
+#    ... )
+#    >>> for i in range(streamer.num_src_streams):
+#    ...     print(streamer.get_src_stream_info(i))
+#    SourceVideoStream(media_type='video', codec='rawvideo', codec_long_name='raw video', format='bgr0', bit_rate=0, width=640, height=480, frame_rate=30.0)
+#    SourceAudioStream(media_type='audio', codec='pcm_f32le', codec_long_name='PCM 32-bit floating point little-endian', format='flt', bit_rate=3072000, sample_rate=48000.0, num_channels=2)
+#
+
+######################################################################
+# Synthetic source streams
+# ------------------------
+#
+# As a part of device integration, ffmpeg provides a "virtual device"
+# interface. This interface provides synthetic audio / video data
+# generation using libavfilter.
+#
+# To use this, we set ``format=lavfi`` and provide a filter description
+# to ``src``.
+#
+# The detail of filter description can be found at
+# https://ffmpeg.org/ffmpeg-filters.html
+#
+
+######################################################################
+# Audio Examples
+# ~~~~~~~~~~~~~~
+#
+
+######################################################################
+# Sine wave
+# ^^^^^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#sine
+#
+# .. code::
+#
+#    StreamReader(src="sine=sample_rate=8000:frequency=360", format="lavfi")
+#
+# .. raw:: html
+#
+#    <audio controls>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/sine.wav">
+#    </audio>
+#    <img
+#     src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/sine.png"
+#     class="sphx-glr-single-img" style="width:80%">
+#
+
+######################################################################
+# Signal with arbitral expression
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# https://ffmpeg.org/ffmpeg-filters.html#aevalsrc
+#
+# .. code::
+#
+#    # 5 Hz binaural beats on a 360 Hz carrier
+#    StreamReader(
+#        src=(
+#            'aevalsrc='
+#            'sample_rate=8000:'
+#            'exprs=0.1*sin(2*PI*(360-5/2)*t)|0.1*sin(2*PI*(360+5/2)*t)'
+#        ),
+#        format='lavfi',
+#     )
+#
+# .. raw:: html
+#
+#    <audio controls>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/aevalsrc.wav">
+#    </audio>
+#    <img
+#     src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/aevalsrc.png"
+#     class="sphx-glr-single-img" style="width:80%">
+#
+
+######################################################################
+# Noise
+# ^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#anoisesrc
+#
+# .. code::
+#
+#    StreamReader(src="anoisesrc=color=pink:sample_rate=8000:amplitude=0.5", format="lavfi")
+#
+# .. raw:: html
+#
+#    <audio controls>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/anoisesrc.wav">
+#    </audio>
+#    <img
+#     src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/anoisesrc.png"
+#     class="sphx-glr-single-img" style="width:80%">
+#
+
+######################################################################
+# Video Examples
+# ~~~~~~~~~~~~~~
+#
+
+######################################################################
+# Cellular automaton
+# ^^^^^^^^^^^^^^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#cellauto
+#
+# .. code::
+#
+#    StreamReader(src=f"cellauto", format="lavfi")
+#
+# .. raw:: html
+#
+#    <video controls autoplay loop muted>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/cellauto.mp4">
+#    </video>
+#
+
+######################################################################
+# Mandelbrot
+# ^^^^^^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#cellauto
+#
+# .. code::
+#
+#    StreamReader(src=f"mandelbrot", format="lavfi")
+#
+# .. raw:: html
+#
+#    <video controls autoplay loop muted>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/mandelbrot.mp4">
+#    </video>
+#
+
+######################################################################
+# MPlayer Test patterns
+# ^^^^^^^^^^^^^^^^^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#mptestsrc
+#
+# .. code::
+#
+#    StreamReader(src=f"mptestsrc", format="lavfi")
+#
+# .. raw:: html
+#
+#    <video controls autoplay loop muted width=192 height=192>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/mptestsrc.mp4">
+#    </video>
+#
+
+######################################################################
+# John Conway's life game
+# ^^^^^^^^^^^^^^^^^^^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#life
+#
+# .. code::
+#
+#    StreamReader(src=f"life", format="lavfi")
+#
+# .. raw:: html
+#
+#    <video controls autoplay loop muted>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/life.mp4">
+#    </video>
+#
+
+######################################################################
+# Sierpinski carpet/triangle fractal
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# https://ffmpeg.org/ffmpeg-filters.html#sierpinski
+#
+# .. code::
+#
+#    StreamReader(src=f"sierpinski", format="lavfi")
+#
+# .. raw:: html
+#
+#    <video controls autoplay loop muted>
+#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/sierpinski.mp4">
+#    </video>
+#
+
+######################################################################
+# Custom filters
+# --------------
+#
+# When defining an output stream, you can use
+# :py:meth:`~torchaudio.io.StreamReader.add_audio_stream` and
+# :py:meth:`~torchaudio.io.StreamReader.add_video_stream` methods.
+#
+# These methods take ``filter_desc`` argument, which is a string
+# formatted according to ffmpeg's
+# `filter expression <https://ffmpeg.org/ffmpeg-filters.html>`_.
+#
+# The difference between ``add_basic_(audio|video)_stream`` and
+# ``add_(audio|video)_stream`` is that ``add_basic_(audio|video)_stream``
+# constructs the filter expression and passes it to the same underlying
+# implementation. Everything ``add_basic_(audio|video)_stream`` can be
+# achieved with ``add_(audio|video)_stream``.
+#
+# .. note::
+#
+#    - When applying custom filters, the client code must convert
+#      the audio/video stream to one of the formats that torchaudio
+#      can convert to tensor format.
+#      This can be achieved, for example, by applying
+#      ``format=pix_fmts=rgb24`` to video stream and
+#      ``aformat=sample_fmts=fltp`` to audio stream.
+#    - Each output stream has separate filter graph. Therefore, it is
+#      not possible to use different input/output streams for a
+#      filter expression. However, it is possible to split one input
+#      stream into multiple of them, and merge them later.
+#
+
+######################################################################
+# Audio Examples
+# ~~~~~~~~~~~~~~
+#
+#
+
+# fmt: off
+descs = [
+    # No filtering
+    "anull",
+    # Apply a highpass filter then a lowpass filter
+    "highpass=f=200,lowpass=f=1000",
+    # Manipulate spectrogram
+    (
+        "afftfilt="
+        "real='hypot(re,im)*sin(0)':"
+        "imag='hypot(re,im)*cos(0)':"
+        "win_size=512:"
+        "overlap=0.75"
+    ),
+    # Manipulate spectrogram
+    (
+        "afftfilt="
+        "real='hypot(re,im)*cos((random(0)*2-1)*2*3.14)':"
+        "imag='hypot(re,im)*sin((random(1)*2-1)*2*3.14)':"
+        "win_size=128:"
+        "overlap=0.8"
+    ),
+]
+# fmt: on
+
+######################################################################
+#
+
+sample_rate = 8000
+
+streamer = StreamReader(AUDIO_URL)
+for desc in descs:
+    streamer.add_audio_stream(
+        frames_per_chunk=40000,
+        filter_desc=f"aresample={sample_rate},{desc},aformat=sample_fmts=fltp",
+    )
+
+chunks = next(streamer.stream())
+
+
+def _display(i):
+    print("filter_desc:", streamer.get_out_stream_info(i).filter_description)
+    _, axs = plt.subplots(2, 1)
+    waveform = chunks[i][:, 0]
+    axs[0].plot(waveform)
+    axs[0].grid(True)
+    axs[0].set_ylim([-1, 1])
+    plt.setp(axs[0].get_xticklabels(), visible=False)
+    axs[1].specgram(waveform, Fs=sample_rate)
+    return IPython.display.Audio(chunks[i].T, rate=sample_rate)
+
+
+######################################################################
+# Original
+# ^^^^^^^^
+#
+
+_display(0)
+
+######################################################################
+# Highpass / lowpass filter
+# ^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+
+_display(1)
+
+######################################################################
+# FFT filter - Robot 🤖
+# ^^^^^^^^^^^^^^^^^^^^^
+#
+
+_display(2)
+
+######################################################################
+# FFT filter - Whisper
+# ^^^^^^^^^^^^^^^^^^^^
+#
+
+_display(3)
+
+######################################################################
+# Video Examples
+# ~~~~~~~~~~~~~~
+#
+
+# fmt: off
+descs = [
+    # No effect
+    "null",
+    # Split the input stream and apply horizontal flip to the right half.
+    (
+        "split [main][tmp];"
+        "[tmp] crop=iw/2:ih:0:0, hflip [flip];"
+        "[main][flip] overlay=W/2:0"
+    ),
+    # Edge detection
+    "edgedetect=mode=canny",
+    # Rotate image by randomly and fill the background with brown
+    "rotate=angle=-random(1)*PI:fillcolor=brown",
+    # Manipulate pixel values based on the coordinate
+    "geq=r='X/W*r(X,Y)':g='(1-X/W)*g(X,Y)':b='(H-Y)/H*b(X,Y)'"
+]
+# fmt: on
+
+######################################################################
+#
+
+streamer = StreamReader(VIDEO_URL)
+for desc in descs:
+    streamer.add_video_stream(
+        frames_per_chunk=30,
+        filter_desc=f"fps=10,{desc},format=pix_fmts=rgb24",
+    )
+
+streamer.seek(12)
+
+chunks = next(streamer.stream())
+
+
+def _display(i):
+    print("filter_desc:", streamer.get_out_stream_info(i).filter_description)
+    _, axs = plt.subplots(1, 3, figsize=(8, 1.9))
+    chunk = chunks[i]
+    for j in range(3):
+        axs[j].imshow(chunk[10 * j + 1].permute(1, 2, 0))
+        axs[j].set_axis_off()
+    plt.tight_layout()
+    plt.show(block=False)
+
+
+######################################################################
+# Original
+# ^^^^^^^^
+
+_display(0)
+
+######################################################################
+# Mirror
+# ^^^^^^
+
+_display(1)
+
+######################################################################
+# Edge detection
+# ^^^^^^^^^^^^^^^
+
+_display(2)
+
+######################################################################
+# Random rotation
+# ^^^^^^^^^^^^^^^
+
+_display(3)
+
+######################################################################
+# Pixel manipulation
+# ^^^^^^^^^^^^^^^^^^
+
+_display(4)
--- a/examples/tutorials/streaming_api_tutorial.py
+++ b/examples/tutorials/streaming_api_tutorial.py
 """
-Media Stream API
-================
+Media Stream API - Pt. 1
+========================

 This tutorial shows how to use torchaudio's I/O stream API to
 fetch and decode audio/video data and apply preprocessings that
@@ -25,8 +25,8 @@ libavfilter provides.
 #

 ######################################################################
-# 1. Overview
-# -----------
+# Overview
+# --------
 #
 # Streaming API leverages the powerful I/O features of ffmpeg.
 #
@@ -57,8 +57,8 @@ libavfilter provides.
 #

 ######################################################################
-# 2. Preparation
-# --------------
+# Preparation
+# -----------
 #

 import torch
@@ -100,8 +100,8 @@ AUDIO_URL = f"{base_url}/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
 VIDEO_URL = f"{base_url}/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"

 ######################################################################
-# 3. Opening the source
-# ---------------------
+# Opening the source
+# ------------------
 #
 # There are mainly three different sources that streaming API can
 # handle. Whichever source is used, the remaining processes
@@ -113,7 +113,7 @@ VIDEO_URL = f"{base_url}/stream-api/NASAs_Most_Scientifically_Complex_Space_Obse
 #
 # The following section covers how to open common media formats.
 # For the other streams, please refer to the
-# `Advanced I/O streams` section.
+# `Media Stream API - Pt.2 <./streaming_api2_tutorial.html>`__.
 #
 # .. note::
 #
@@ -211,8 +211,8 @@ VIDEO_URL = f"{base_url}/stream-api/NASAs_Most_Scientifically_Complex_Space_Obse
 #

 ######################################################################
-# Opening a headerless data
-# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# Headerless media
+# ~~~~~~~~~~~~~~~~
 #
 # If attempting to load headerless raw data, you can use ``format`` and
 # ``option`` to specify the format of the data.
@@ -233,8 +233,8 @@ VIDEO_URL = f"{base_url}/stream-api/NASAs_Most_Scientifically_Complex_Space_Obse
 #

 ######################################################################
-# 4. Checking the source streams
-# ------------------------------
+# Checking the source streams
+# ---------------------------
 #
 # Once the media is opened, we can inspect the streams and configure
 # the output streams.
@@ -279,8 +279,8 @@ for i in range(streamer.num_src_streams):
    print(streamer.get_src_stream_info(i))

 ######################################################################
-# 5. Configuring output streams
-# -------------------------------
+# Configuring output streams
+# --------------------------
 #
 # The stream API lets you stream data from an arbitrary combination of
 # the input streams. If your application does not need audio or video,
@@ -421,8 +421,8 @@ for i in range(streamer.num_src_streams):
 #

 ######################################################################
-# 6. Streaming
-# ------------
+# Streaming
+# ---------
 #
 # To stream media data, the streamer alternates the process of
 # fetching and decoding the source data, and passing the resulting
@@ -448,8 +448,8 @@ for i in range(streamer.num_src_streams):
 #

 ######################################################################
-# 7. Example
-# ----------
+# Example
+# -------
 #
 # Let's take an example video to configure the output streams.
 # We will use the following video.
@@ -610,449 +610,3 @@ for i, vid in enumerate(vids2):
            ax.set_ylabel("Stream 2")
 plt.tight_layout()
 plt.show(block=False)
-
-######################################################################
-# [Advanced I/O streams]
-# ----------------------
-#
-
-######################################################################
-# 1. Audio / Video device input
-# -----------------------------
-#
-# .. seealso::
-#
-#    - `Accelerated Video Decoding with NVDEC <../hw_acceleration_tutorial.html>`__.
-#    - `Online ASR with Emformer RNN-T <./online_asr_tutorial.html>`__.
-#    - `Device ASR with Emformer RNN-T <./device_asr.html>`__.
-#
-# Given that the system has proper media devices and libavdevice is
-# configured to use the devices, the streaming API can
-# pull media streams from these devices.
-#
-# To do this, we pass additional parameters ``format`` and ``option``
-# to the constructor. ``format`` specifies the device component and
-# ``option`` dictionary is specific to the specified component.
-#
-# The exact arguments to be passed depend on the system configuration.
-# Please refer to https://ffmpeg.org/ffmpeg-devices.html for the detail.
-#
-# The following example illustrates how one can do this on MacBook Pro.
-#
-# First, we need to check the available devices.
-#
-# .. code::
-#
-#    $ ffmpeg -f avfoundation -list_devices true -i ""
-#    [AVFoundation indev @ 0x143f04e50] AVFoundation video devices:
-#    [AVFoundation indev @ 0x143f04e50] [0] FaceTime HD Camera
-#    [AVFoundation indev @ 0x143f04e50] [1] Capture screen 0
-#    [AVFoundation indev @ 0x143f04e50] AVFoundation audio devices:
-#    [AVFoundation indev @ 0x143f04e50] [0] MacBook Pro Microphone
-#
-# We use `FaceTime HD Camera` as video device (index 0) and
-# `MacBook Pro Microphone` as audio device (index 0).
-#
-# If we do not pass any ``option``, the device uses its default
-# configuration. The decoder might not support the configuration.
-#
-# .. code::
-#
-#    >>> StreamReader(
-#    ...     src="0:0",  # The first 0 means `FaceTime HD Camera`, and
-#    ...                 # the second 0 indicates `MacBook Pro Microphone`.
-#    ...     format="avfoundation",
-#    ... )
-#    [avfoundation @ 0x125d4fe00] Selected framerate (29.970030) is not supported by the device.
-#    [avfoundation @ 0x125d4fe00] Supported modes:
-#    [avfoundation @ 0x125d4fe00]   1280x720@[1.000000 30.000000]fps
-#    [avfoundation @ 0x125d4fe00]   640x480@[1.000000 30.000000]fps
-#    Traceback (most recent call last):
-#      File "<stdin>", line 1, in <module>
-#      ...
-#    RuntimeError: Failed to open the input: 0:0
-#
-# By providing ``option``, we can change the format that the device
-# streams to a format supported by decoder.
-#
-# .. code::
-#
-#    >>> streamer = StreamReader(
-#    ...     src="0:0",
-#    ...     format="avfoundation",
-#    ...     option={"framerate": "30", "pixel_format": "bgr0"},
-#    ... )
-#    >>> for i in range(streamer.num_src_streams):
-#    ...     print(streamer.get_src_stream_info(i))
-#    SourceVideoStream(media_type='video', codec='rawvideo', codec_long_name='raw video', format='bgr0', bit_rate=0, width=640, height=480, frame_rate=30.0)
-#    SourceAudioStream(media_type='audio', codec='pcm_f32le', codec_long_name='PCM 32-bit floating point little-endian', format='flt', bit_rate=3072000, sample_rate=48000.0, num_channels=2)
-#
-
-######################################################################
-# 2. Synthetic source streams
-# ---------------------------
-#
-# As a part of device integration, ffmpeg provides a "virtual device"
-# interface. This interface provides synthetic audio / video data
-# generation using libavfilter.
-#
-# To use this, we set ``format=lavfi`` and provide a filter description
-# to ``src``.
-#
-# The detail of filter description can be found at
-# https://ffmpeg.org/ffmpeg-filters.html
-#
-
-######################################################################
-# Synthetic audio examples
-# ------------------------
-#
-
-######################################################################
-# Sine wave
-# ~~~~~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#sine
-#
-# .. code::
-#
-#    StreamReader(src="sine=sample_rate=8000:frequency=360", format="lavfi")
-#
-# .. raw:: html
-#
-#    <audio controls>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/sine.wav">
-#    </audio>
-#    <img
-#     src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/sine.png"
-#     class="sphx-glr-single-img" style="width:80%">
-#
-
-######################################################################
-# Generate an audio signal specified by an expression
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# https://ffmpeg.org/ffmpeg-filters.html#aevalsrc
-#
-# .. code::
-#
-#    # 5 Hz binaural beats on a 360 Hz carrier
-#    StreamReader(
-#        src=(
-#            'aevalsrc='
-#            'sample_rate=8000:'
-#            'exprs=0.1*sin(2*PI*(360-5/2)*t)|0.1*sin(2*PI*(360+5/2)*t)'
-#        ),
-#        format='lavfi',
-#     )
-#
-# .. raw:: html
-#
-#    <audio controls>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/aevalsrc.wav">
-#    </audio>
-#    <img
-#     src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/aevalsrc.png"
-#     class="sphx-glr-single-img" style="width:80%">
-#
-
-######################################################################
-# Noise
-# ~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#anoisesrc
-#
-# .. code::
-#
-#    StreamReader(src="anoisesrc=color=pink:sample_rate=8000:amplitude=0.5", format="lavfi")
-#
-# .. raw:: html
-#
-#    <audio controls>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/anoisesrc.wav">
-#    </audio>
-#    <img
-#     src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/anoisesrc.png"
-#     class="sphx-glr-single-img" style="width:80%">
-#
-
-######################################################################
-# Synthetic video examples
-# ------------------------
-#
-
-######################################################################
-# Cellular automaton
-# ~~~~~~~~~~~~~~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#cellauto
-#
-# .. code::
-#
-#    StreamReader(src=f"cellauto", format="lavfi")
-#
-# .. raw:: html
-#
-#    <video controls autoplay loop muted>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/cellauto.mp4">
-#    </video>
-#
-
-######################################################################
-# Mandelbrot
-# ~~~~~~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#cellauto
-#
-# .. code::
-#
-#    StreamReader(src=f"mandelbrot", format="lavfi")
-#
-# .. raw:: html
-#
-#    <video controls autoplay loop muted>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/mandelbrot.mp4">
-#    </video>
-#
-
-######################################################################
-# MPlayer Test patterns
-# ~~~~~~~~~~~~~~~~~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#mptestsrc
-#
-# .. code::
-#
-#    StreamReader(src=f"mptestsrc", format="lavfi")
-#
-# .. raw:: html
-#
-#    <video controls autoplay loop muted width=192 height=192>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/mptestsrc.mp4">
-#    </video>
-#
-
-######################################################################
-# John Conway's life game
-# ~~~~~~~~~~~~~~~~~~~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#life
-#
-# .. code::
-#
-#    StreamReader(src=f"life", format="lavfi")
-#
-# .. raw:: html
-#
-#    <video controls autoplay loop muted>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/life.mp4">
-#    </video>
-#
-
-######################################################################
-# Sierpinski carpet/triangle fractal
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# https://ffmpeg.org/ffmpeg-filters.html#sierpinski
-#
-# .. code::
-#
-#    StreamReader(src=f"sierpinski", format="lavfi")
-#
-# .. raw:: html
-#
-#    <video controls autoplay loop muted>
-#        <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/sierpinski.mp4">
-#    </video>
-#
-
-######################################################################
-# 3. Custom output streams
-# ------------------------
-#
-# When defining an output stream, you can use
-# :py:meth:`~torchaudio.io.StreamReader.add_audio_stream` and
-# :py:meth:`~torchaudio.io.StreamReader.add_video_stream` methods.
-#
-# These methods take ``filter_desc`` argument, which is a string
-# formatted according to ffmpeg's
-# `filter expression <https://ffmpeg.org/ffmpeg-filters.html>`_.
-#
-# The difference between ``add_basic_(audio|video)_stream`` and
-# ``add_(audio|video)_stream`` is that ``add_basic_(audio|video)_stream``
-# constructs the filter expression and passes it to the same underlying
-# implementation. Everything ``add_basic_(audio|video)_stream`` can be
-# achieved with ``add_(audio|video)_stream``.
-#
-# .. note::
-#
-#    - When applying custom filters, the client code must convert
-#      the audio/video stream to one of the formats that torchaudio
-#      can convert to tensor format.
-#      This can be achieved, for example, by applying
-#      ``format=pix_fmts=rgb24`` to video stream and
-#      ``aformat=sample_fmts=fltp`` to audio stream.
-#    - Each output stream has separate filter graph. Therefore, it is
-#      not possible to use different input/output streams for a
-#      filter expression. However, it is possible to split one input
-#      stream into multiple of them, and merge them later.
-#
-
-######################################################################
-# Custom audio streams
-# --------------------
-#
-#
-
-# fmt: off
-descs = [
-    # No filtering
-    "anull",
-    # Apply a highpass filter then a lowpass filter
-    "highpass=f=200,lowpass=f=1000",
-    # Manipulate spectrogram
-    (
-        "afftfilt="
-        "real='hypot(re,im)*sin(0)':"
-        "imag='hypot(re,im)*cos(0)':"
-        "win_size=512:"
-        "overlap=0.75"
-    ),
-    # Manipulate spectrogram
-    (
-        "afftfilt="
-        "real='hypot(re,im)*cos((random(0)*2-1)*2*3.14)':"
-        "imag='hypot(re,im)*sin((random(1)*2-1)*2*3.14)':"
-        "win_size=128:"
-        "overlap=0.8"
-    ),
-]
-# fmt: on
-
-######################################################################
-#
-
-sample_rate = 8000
-
-streamer = StreamReader(AUDIO_URL)
-for desc in descs:
-    streamer.add_audio_stream(
-        frames_per_chunk=40000,
-        filter_desc=f"aresample={sample_rate},{desc},aformat=sample_fmts=fltp",
-    )
-
-chunks = next(streamer.stream())
-
-
-def _display(i):
-    print("filter_desc:", streamer.get_out_stream_info(i).filter_description)
-    _, axs = plt.subplots(2, 1)
-    waveform = chunks[i][:, 0]
-    axs[0].plot(waveform)
-    axs[0].grid(True)
-    axs[0].set_ylim([-1, 1])
-    plt.setp(axs[0].get_xticklabels(), visible=False)
-    axs[1].specgram(waveform, Fs=sample_rate)
-    return IPython.display.Audio(chunks[i].T, rate=sample_rate)
-
-
-######################################################################
-# Original
-# ~~~~~~~~
-#
-
-_display(0)
-
-######################################################################
-# Highpass / lowpass filter
-# ~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-
-_display(1)
-
-######################################################################
-# FFT filter - Robot 🤖
-# ~~~~~~~~~~~~~~~~~~~~~
-#
-
-_display(2)
-
-######################################################################
-# FFT filter - Whisper
-# ~~~~~~~~~~~~~~~~~~~~
-#
-
-_display(3)
-
-######################################################################
-# Custom video streams
-# --------------------
-#
-
-# fmt: off
-descs = [
-    # No effect
-    "null",
-    # Split the input stream and apply horizontal flip to the right half.
-    (
-        "split [main][tmp];"
-        "[tmp] crop=iw/2:ih:0:0, hflip [flip];"
-        "[main][flip] overlay=W/2:0"
-    ),
-    # Edge detection
-    "edgedetect=mode=canny",
-    # Rotate image by randomly and fill the background with brown
-    "rotate=angle=-random(1)*PI:fillcolor=brown",
-    # Manipulate pixel values based on the coordinate
-    "geq=r='X/W*r(X,Y)':g='(1-X/W)*g(X,Y)':b='(H-Y)/H*b(X,Y)'"
-]
-# fmt: on
-
-######################################################################
-#
-
-streamer = StreamReader(VIDEO_URL)
-for desc in descs:
-    streamer.add_video_stream(
-        frames_per_chunk=30,
-        filter_desc=f"fps=10,{desc},format=pix_fmts=rgb24",
-    )
-
-streamer.seek(12)
-
-chunks = next(streamer.stream())
-
-
-def _display(i):
-    print("filter_desc:", streamer.get_out_stream_info(i).filter_description)
-    _, axs = plt.subplots(1, 3, figsize=(8, 1.9))
-    chunk = chunks[i]
-    for j in range(3):
-        axs[j].imshow(chunk[10 * j + 1].permute(1, 2, 0))
-        axs[j].set_axis_off()
-    plt.tight_layout()
-    plt.show(block=False)
-
-
-######################################################################
-# Original
-# ~~~~~~~~
-
-_display(0)
-
-######################################################################
-# Mirror
-# ~~~~~~
-
-_display(1)
-
-######################################################################
-# Edge detection
-# ~~~~~~~~~~~~~~~
-
-_display(2)
-
-######################################################################
-# Random rotation
-# ~~~~~~~~~~~~~~~
-
-_display(3)
-
-######################################################################
-# Pixel manipulation
-# ~~~~~~~~~~~~~~~~~~
-
-_display(4)