UPDATE

ffeba11a · mayp777 · 29deb085 · ffeba11a · ffeba11a · ffeba11a
Commit ffeba11a authored Sep 02, 2024 by mayp777
20 changed files
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -15,6 +15,26 @@ article.pytorch-article img.shield-badge {
    margin-top: -18px;
    margin-bottom: 9px;
 }
+/* Apply code highlight to the whole sentences instead of each word */
+code.docutils.literal.notranslate {
+    background-color: #f3f4f7;
+    border-color: #f3f4f7;
+    border-radius: 5px;
+    padding: 1px 2px;
+}
+code.docutils.literal.notranslate span.pre {
+    background-color: transparent;
+    padding: none;
+}
+/* Do not overwrite in Tables generated by autosummary */
+tr.row-odd code.docutils.literal.notranslate span.pre {
+    background-color: transparent;
+    border-color: transparent;
+}
+tr.row-even code.docutils.literal.notranslate {
+    background-color: transparent;
+    border-color: transparent;
+}
 /* Fix for Sphinx gallery 0.11
 See https://github.com/sphinx-gallery/sphinx-gallery/issues/990
 */
@@ -35,3 +55,23 @@ article.pytorch-article div.tutorials-card div.card-body code {
    border-bottom: none;
    background-color: #AFB8C133;
 }
+/* C++ doc */
+/* Fix the malindentation of `class` header*/
+article.pytorch-article .cpp.class dt {
+    padding-left: 0.5em;
+}
+/* keywords like const and explicit, class */
+dt.sig.sig-object.cpp > span.k > span.pre {
+    color: #c95362;
+}
+
+dt.sig.sig-object.cpp > span.n:not(.sig-param) > span.pre {
+    color: #9257c6;
+}
+/* Fix */
+dt.sig.sig-object.cpp > span.sig-prename.descclassname > span.n > span.pre {
+    color: #6c6c6d;
+}
+dt.sig.sig-object.cpp > span.sig-prename.descname > span.n > span.pre {
+    color: #6c6c6d;
+}
--- a/docs/source/_templates/autosummary/bundle_class.rst
+++ b/docs/source/_templates/autosummary/bundle_class.rst
@@ -5,7 +5,11 @@

 .. autoclass:: {{ fullname }}()

-{%- if name in ["RNNTBundle.FeatureExtractor", "RNNTBundle.TokenProcessor"] %}
+{%- set support_classes = [] %}
+{%- if name in ["RNNTBundle.FeatureExtractor", "RNNTBundle.TokenProcessor", "Wav2Vec2FABundle.Tokenizer"] %}
+  {%- set methods = ["__call__"] %}
+{%- elif name == "Wav2Vec2FABundle.Aligner" %}
+  {%- set attributes = [] %}
  {%- set methods = ["__call__"] %}
 {%- elif name == "Tacotron2TTSBundle.TextProcessor" %}
  {%- set attributes = ["tokens"] %}
@@ -13,14 +17,25 @@
 {%- elif name == "Tacotron2TTSBundle.Vocoder" %}
  {%- set attributes=["sample_rate"] %}
  {%- set methods = ["__call__"] %}
+{%- elif name == "VGGishBundle.VGGish" %}
+  {%- set attributes = [] %}
+  {%- set methods = ["forward"] %}
+{%- elif name == "VGGishBundle.VGGishInputProcessor" %}
+  {%- set attributes = [] %}
+  {%- set methods = ["__call__"] %}
 {% endif %}

-..
-   ATTRIBUTES
+{%- if attributes %}
+
+Properties
+----------
+
+{%- endif %}
+
 {%- for item in attributes %}
 {%- if not item.startswith('_') %}

-{{ item | underline("-") }}
+{{ item | underline("~") }}

 .. container:: py attribute

@@ -29,13 +44,17 @@
 {%- endif %}
 {%- endfor %}

-..
-   METHODS
+{%- if methods %}
+
+Methods
+-------
+
+{%- endif %}

 {%- for item in methods %}
 {%- if item != "__init__" %}

-{{item | underline("-") }}
+{{item | underline("~") }}

 .. container:: py attribute

@@ -43,3 +62,24 @@

 {%- endif %}
 {%- endfor %}
+
+{%- if support_classes %}
+
+Support Structures
+------------------
+
+{%- endif %}
+
+{%- for item in support_classes %}
+
+{% set components = item.split('.') %}
+
+{{ components[-1] | underline("~") }}
+
+.. container:: py attribute
+
+   .. autoclass:: {{[fullname, item] | join('.')}}
+      :members:
+
+
+{%- endfor %}
--- a/docs/source/_templates/autosummary/cuda_ctc_decoder_class.rst
+++ b/docs/source/_templates/autosummary/cuda_ctc_decoder_class.rst
+..
+  autogenerated from source/_templates/autosummary/cuda_ctc_decoder_class.rst
+
+
+{#
+    ################################################################################
+    # autosummary template for CUCTCDecoder
+    # Since the class has multiple methods and support structure.
+    # we want to have them show up in the table of contents.
+    # The default class template does not do this, so we use custom one here.
+    ################################################################################
+#}
+
+{{ name | underline }}
+
+{%- if name != "CUCTCDecoder" %}
+
+.. autofunction:: {{fullname}}
+
+{%- else %}
+
+.. autoclass:: {{ fullname }}()
+
+Methods
+=======
+
+{%- for item in members %}
+{%- if not item.startswith('_') or item == "__call__" %}
+
+{{ item | underline("-") }}
+
+.. container:: py attribute
+
+   .. automethod:: {{[fullname, item] | join('.')}}
+
+{%- endif %}
+{%- endfor %}
+
+Support Structures
+==================
+
+{%- for item in ["CUCTCHypothesis"] %}
+
+{{ item | underline("-") }}
+
+.. autoclass:: torchaudio.models.decoder.{{item}}
+   :members:
+
+{%- endfor %}
+
+{%- endif %}
--- a/docs/source/_templates/autosummary/io.rst
+++ b/docs/source/_templates/autosummary/io.rst
+..
+  autogenerated from source/_templates/autosummary/io.rst
+
+{{ fullname | underline }}
+
+.. autofunction:: {{ fullname }}
+
+
+{%- if name == "info" %}
+
+Support Structure
+-----------------
+
+AudioMetaData
+~~~~~~~~~~~~~
+
+.. autoclass:: torchaudio.AudioMetaData
+
+{%- endif %}
--- a/docs/source/_templates/autosummary/io_class.rst
+++ b/docs/source/_templates/autosummary/io_class.rst
@@ -17,12 +17,12 @@
 {%- if attributes %}

 Properties
-==========
+----------

 {%- for item in attributes %}
 {%- if not item.startswith('_') and item not in inherited_members %}

-{{ item | underline("-") }}
+{{ item | underline("~") }}

 .. container:: py attribute

@@ -32,13 +32,19 @@ Properties
 {%- endfor %}
 {%- endif %}

+{%- if members %}
+
 Methods
-=======
+-------

 {%- for item in members %}
-{%- if not item.startswith('_') and item not in inherited_members and item not in attributes %}
+{%- if
+   not item.startswith('_')
+   and item not in inherited_members
+   and item not in attributes
+   %}

-{{ item | underline("-") }}
+{{ item | underline("~") }}

 .. container:: py attribute

@@ -46,18 +52,39 @@ Methods

 {%- endif %}
 {%- endfor %}
+{%- endif %}

-{%- if name == "StreamReader" %}
+
+{%- if name in ["StreamReader", "StreamWriter"] %}

 Support Structures
-==================
+------------------

-{%- for item in ["StreamReaderSourceStream", "StreamReaderSourceAudioStream", "StreamReaderSourceVideoStream", "StreamReaderOutputStream"] %}
+{%- if name == "StreamReader" %}
+{%- for item in [
+    "ChunkTensor",
+    "SourceStream",
+    "SourceAudioStream",
+    "SourceVideoStream",
+    "OutputStream",
+    "OutputAudioStream",
+    "OutputVideoStream",
+] %}
+
+{{ item | underline("~") }}
+
+.. autoclass:: torchaudio.io._stream_reader.{{item}}()
+   :members:

-{{ item | underline("-") }}
+{%- endfor %}
+
+{%- elif name == "StreamWriter" %}
+
+CodecConfig
+~~~~~~~~~~~

-.. autoclass:: torchaudio.io.{{item}}()
+.. autoclass:: torchaudio.io::CodecConfig
   :members:

-{%- endfor %}
+{%- endif %}
 {%- endif %}
--- a/docs/source/_templates/autosummary/model_class.rst
+++ b/docs/source/_templates/autosummary/model_class.rst
 ..
  autogenerated from source/_templates/autosummary/model_class.rst

+.. currentmodule:: torchaudio.models
+
+..
+
 {%- set methods=["forward"] %}
+{%- set helpers={
+        "torchaudio.models.RNNTBeamSearch": [
+            "Hypothesis",
+        ],
+    }
+-%}
+{%- set factory={
+        "torchaudio.models.ConvTasNet": [
+            "conv_tasnet_base",
+        ],
+        "torchaudio.models.Wav2Vec2Model": [
+            "wav2vec2_model",
+            "wav2vec2_base",
+            "wav2vec2_large",
+            "wav2vec2_large_lv60k",
+            "wav2vec2_xlsr_300m",
+            "wav2vec2_xlsr_1b",
+            "wav2vec2_xlsr_2b",
+            "hubert_base",
+            "hubert_large",
+            "hubert_xlarge",
+            "wavlm_model",
+            "wavlm_base",
+            "wavlm_large",
+        ],
+        "torchaudio.models.HuBERTPretrainModel": [
+            "hubert_pretrain_model",
+            "hubert_pretrain_base",
+            "hubert_pretrain_large",
+            "hubert_pretrain_xlarge",
+        ],
+        "torchaudio.models.RNNT": [
+            "emformer_rnnt_model",
+            "emformer_rnnt_base",
+        ],
+        "torchaudio.models.HDemucs": [
+            "hdemucs_low",
+            "hdemucs_medium",
+            "hdemucs_high",
+        ],
+        "torchaudio.models.SquimObjective": [
+            "squim_objective_model",
+            "squim_objective_base",
+        ],
+        "torchaudio.models.SquimSubjective": [
+            "squim_subjective_model",
+            "squim_subjective_base",
+        ],
+    }
+-%}
+{%- set utils={
+        "torchaudio.models.Wav2Vec2Model": [
+            "~torchaudio.models.wav2vec2.utils.import_fairseq_model",
+            "~torchaudio.models.wav2vec2.utils.import_huggingface_model",
+        ]
+    }
+-%}
+
 {%- if name in ["Wav2Vec2Model"] %}
  {{ methods.extend(["extract_features"]) }}
 {%- elif name in ["Emformer", "RNNTBeamSearch", "WaveRNN", "Tacotron2", ] %}
@@ -10,10 +72,17 @@
  {{ methods.extend(["transcribe_streaming", "transcribe", "predict", "join"]) }}
 {%- endif %}

+.. TITLE
+
 {{ name | underline }}

+.. CLASS DEFINITIONS
+
 .. autoclass:: {{ fullname }}

+Methods
+=======
+
 {% for item in methods %}

 {{item | underline("-") }}
@@ -24,17 +93,58 @@

 {%- endfor %}

-{%- if name == "RNNTBeamSearch" %}
+.. HELPER STRUCTURES
+
+{%- if helpers[fullname] %}

 Support Structures
 ==================

-Hypothesis
----------
+{%- for item in helpers[fullname] %}
+
+{{item | underline("-") }}

 .. container:: py attribute

-   .. autodata:: torchaudio.models.Hypothesis
+   .. autodata:: {{["torchaudio.models", item] | join('.')}}
      :no-value:

+{%- endfor %}
+
+{%- endif %}
+
+.. FACTORY FUNCTIONS
+
+{%- if factory[fullname] %}
+
+Factory Functions
+=================
+
+.. autosummary::
+   :toctree: ../generated
+   :nosignatures:
+
+{% for item in factory[fullname] %}
+   {{["~torchaudio.models", item] | join('.')}}
+{%- endfor %}
+
+{%- endif %}
+
+.. UTILITY FUNCTIONS
+
+{%- if utils[fullname] %}
+
+Utility Functions
+=================
+
+.. currentmodule:: torchaudio.models
+
+.. autosummary::
+   :toctree: ../generated
+   :nosignatures:
+
+{% for item in utils[fullname] %}
+   {{ item }}
+{%- endfor %}
+
 {%- endif %}
--- a/docs/source/_templates/breadcrumbs.html
+++ b/docs/source/_templates/breadcrumbs.html
+{# Support for Sphinx 1.3+ page_source_suffix, but don't break old builds. #}
+
+{% if page_source_suffix %}
+{% set suffix = page_source_suffix %}
+{% else %}
+{% set suffix = source_suffix %}
+{% endif %}
+
+{% if meta is defined and meta is not none %}
+{% set check_meta = True %}
+{% else %}
+{% set check_meta = False %}
+{% endif %}
+
+{% if check_meta and 'github_url' in meta %}
+{% set display_github = True %}
+{% endif %}
+
+{% if check_meta and 'bitbucket_url' in meta %}
+{% set display_bitbucket = True %}
+{% endif %}
+
+{% if check_meta and 'gitlab_url' in meta %}
+{% set display_gitlab = True %}
+{% endif %}
+
+<div role="navigation" aria-label="breadcrumbs navigation">
+
+  <ul class="pytorch-breadcrumbs">
+    {% block breadcrumbs %}
+      <li>
+        <a href="{{ pathto(master_doc) }}">
+          {% if theme_pytorch_project == 'tutorials' %}
+            Tutorials
+          {% else %}
+            Docs
+          {% endif %}
+        </a> &gt;
+      </li>
+
+        {% for doc in parents %}
+          <li><a href="{{ doc.link|e }}">{{ doc.title }}</a> &gt;</li>
+        {% endfor %}
+      <li>{{ title }} &gt;</li>
+      {% if 'dev' in version %}
+      <li>Nightly (unstable)</li>
+      {% elif version_stable in version %}
+      <li>Current (stable)</li>
+      {% else %}
+      <li>Old version (stable)</li>
+      {% endif %}
+    {% endblock %}
+    {% block breadcrumbs_aside %}
+      <li class="pytorch-breadcrumbs-aside">
+        {% if hasdoc(pagename) %}
+            {% if display_github %}
+            {% if check_meta and 'github_url' in meta %}
+              <!-- User defined GitHub URL -->
+              <a href="{{ meta['github_url'] }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
+            {% else %}
+              <a href="https://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/{{ theme_vcs_pageview_mode|default("blob") }}/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
+            {% endif %}
+          {% elif display_bitbucket %}
+            {% if check_meta and 'bitbucket_url' in meta %}
+              <!-- User defined Bitbucket URL -->
+              <a href="{{ meta['bitbucket_url'] }}" class="fa fa-bitbucket"> {{ _('Edit on Bitbucket') }}</a>
+            {% else %}
+              <a href="https://bitbucket.org/{{ bitbucket_user }}/{{ bitbucket_repo }}/src/{{ bitbucket_version}}{{ conf_py_path }}{{ pagename }}{{ suffix }}?mode={{ theme_vcs_pageview_mode|default("view") }}" class="fa fa-bitbucket"> {{ _('Edit on Bitbucket') }}</a>
+            {% endif %}
+          {% elif display_gitlab %}
+            {% if check_meta and 'gitlab_url' in meta %}
+              <!-- User defined GitLab URL -->
+              <a href="{{ meta['gitlab_url'] }}" class="fa fa-gitlab"> {{ _('Edit on GitLab') }}</a>
+            {% else %}
+              <a href="https://{{ gitlab_host|default("gitlab.com") }}/{{ gitlab_user }}/{{ gitlab_repo }}/{{ theme_vcs_pageview_mode|default("blob") }}/{{ gitlab_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-gitlab"> {{ _('Edit on GitLab') }}</a>
+            {% endif %}
+          {% elif show_source and source_url_prefix %}
+            <a href="{{ source_url_prefix }}{{ pagename }}{{ suffix }}"><img src="{{ pathto('_static/images/view-page-source-icon.svg', 1) }}"></a>
+          {% elif show_source and has_source and sourcename %}
+            <a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow"><img src="{{ pathto('_static/images/view-page-source-icon.svg', 1) }}"></a>
+          {% endif %}
+        {% endif %}
+      </li>
+    {% endblock %}
+  </ul>
+
+  {% if (theme_prev_next_buttons_location == 'top' or theme_prev_next_buttons_location == 'both') and (next or prev) %}
+  <div class="rst-breadcrumbs-buttons" role="navigation" aria-label="breadcrumb navigation">
+      {% if next %}
+        <a href="{{ next.link|e }}" class="btn btn-neutral float-right" title="{{ next.title|striptags|e }}" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
+      {% endif %}
+      {% if prev %}
+        <a href="{{ prev.link|e }}" class="btn btn-neutral" title="{{ prev.title|striptags|e }}" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
+      {% endif %}
+  </div>
+  {% endif %}
+</div>
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -2,7 +2,7 @@

 {% block sidebartitle %}
    <div class="version">
-      <a href="{{ pathto('../versions.html', 1) }}">{{ version }} &#x25BC</a>
+      <a href="{{ pathto('../versions.html', 1) }}"><span style="font-size:110%">{{ version }} &#x25BC</span></a>
    </div>
    {% include "searchbox.html" %}
 {% endblock %}
@@ -83,7 +83,6 @@
          $(".main-menu a:contains('Github')").each(overwrite);
      });

-      {% if 'tutorial' in pagename %}
      {#
       # Override the right side menu bar behavior so that subsections
       # are shown by default in tutorial page.
@@ -107,6 +106,30 @@
               }
           };
       });
-      {% endif %}
    </script>
+
+    {% if 'libtorchaudio' in pagename %}
+    {#
+     # change the layout of signature one param per line if #params >= 2
+     #}
+    <script type="text/javascript">
+      $(window).ready(function() {
+          $("dt.sig.sig-object.cpp").each(function(i) {
+              let newline = "<br>\20\20\20\20";
+              let params = $(this).children(".sig-param");
+              if (params.length >= 2) {
+                  $(this).html($(this).html().replace(/, /g, "," + newline));
+                  $(this).children(".sig-paren").each(function(i) {
+                      console.log(i, $(this));
+                      if (i == 0) {
+                          $(this).html($(this).html() + newline + "\20");
+                      } else if (i == 1) {
+                          $(this).html("<br>" + $(this).html());
+                      }
+                  });
+              }
+          });
+      });
+    </script>
+    {% endif %}
 {% endblock %}
--- a/docs/source/build.ffmpeg.rst
+++ b/docs/source/build.ffmpeg.rst
+.. _enabling_hw_decoder:
+
+Enabling GPU video decoder/encoder
+==================================
+
+TorchAudio can make use of hardware-based video decoding and encoding supported by underlying FFmpeg libraries that are linked at runtime.
+
+Using NVIDIA's GPU decoder and encoder, it is also possible to pass around CUDA Tensor directly, that is decode video into CUDA tensor or encode video from CUDA tensor, without moving data from/to CPU.
+
+This improves the video throughput significantly. However, please note that not all the video formats are supported by hardware acceleration.
+
+This page goes through how to build FFmpeg with hardware acceleration. For the detail on the performance of GPU decoder and encoder please see :ref:`NVDEC tutoial <nvdec_tutorial>` and :ref:`NVENC tutorial <nvenc_tutorial>`.
+
+Overview
+--------
+
+Using them in TorchAduio requires additional FFmpeg configuration.
+
+In the following, we look into how to enable GPU video decoding with `NVIDIA's Video codec SDK <https://developer.nvidia.com/nvidia-video-codec-sdk>`_.
+To use NVENC/NVDEC with TorchAudio, the following items are required.
+
+1. NVIDIA GPU with hardware video decoder/encoder.
+
+2. FFmpeg libraries compiled with NVDEC/NVENC support. †
+
+3. PyTorch / TorchAudio with CUDA support.
+
+TorchAudio’s official binary distributions are compiled to work with FFmpeg libraries, and they contain the logic to use hardware decoding/encoding.
+
+In the following, we build FFmpeg 4 libraries with NVDEC/NVENC support. You can also use FFmpeg 5 or 6.
+
+The following procedure was tested on Ubuntu.
+
+† For details on NVDEC/NVENC and FFmpeg, please refer to the following articles.
+
+- https://docs.nvidia.com/video-technologies/video-codec-sdk/11.1/nvdec-video-decoder-api-prog-guide/
+- https://docs.nvidia.com/video-technologies/video-codec-sdk/11.1/ffmpeg-with-nvidia-gpu/index.html#compiling-ffmpeg
+- https://developer.nvidia.com/blog/nvidia-ffmpeg-transcoding-guide/
+
+Check the GPU and CUDA version
+------------------------------
+
+First, check the available GPU. Here, we have Tesla T4 with CUDA Toolkit 11.2 installed.
+
+.. code-block::
+
+   $ nvidia-smi
+
+   Fri Oct  7 13:01:26 2022
+   +-----------------------------------------------------------------------------+
+   | NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
+   |-------------------------------+----------------------+----------------------+
+   | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+   | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+   |                               |                      |               MIG M. |
+   |===============================+======================+======================|
+   |   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
+   | N/A   56C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
+   |                               |                      |                  N/A |
+   +-------------------------------+----------------------+----------------------+
+
+   +-----------------------------------------------------------------------------+
+   | Processes:                                                                  |
+   |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+   |        ID   ID                                                   Usage      |
+   |=============================================================================|
+   |  No running processes found                                                 |
+   +-----------------------------------------------------------------------------+
+
+Checking the compute capability
+-------------------------------
+
+Later, we need the version of compute capability supported by this GPU. The following page lists the GPUs and corresponding compute capabilities. The compute capability of T4 is ``7.5``.
+
+https://developer.nvidia.com/cuda-gpus
+
+Install NVIDIA Video Codec Headers
+----------------------------------
+
+To build FFmpeg with NVDEC/NVENC, we first need to install the headers that FFmpeg uses to interact with Video Codec SDK.
+
+Since we have CUDA 11 working in the system, we use one of ``n11`` tag.
+
+.. code-block:: bash
+
+   git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git
+   cd nv-codec-headers
+   git checkout n11.0.10.1
+   sudo make install
+
+The location of installation can be changed with ``make PREFIX=<DESIRED_DIRECTORY> install``.
+   
+.. code-block:: text
+
+   Cloning into 'nv-codec-headers'...
+   remote: Enumerating objects: 819, done.
+   remote: Counting objects: 100% (819/819), done.
+   remote: Compressing objects: 100% (697/697), done.
+   remote: Total 819 (delta 439), reused 0 (delta 0)
+   Receiving objects: 100% (819/819), 156.42 KiB | 410.00 KiB/s, done.
+   Resolving deltas: 100% (439/439), done.
+   Note: checking out 'n11.0.10.1'.
+
+   You are in 'detached HEAD' state. You can look around, make experimental
+   changes and commit them, and you can discard any commits you make in this
+   state without impacting any branches by performing another checkout.
+
+   If you want to create a new branch to retain commits you create, you may
+   do so (now or later) by using -b with the checkout command again. Example:
+
+     git checkout -b <new-branch-name>
+
+   HEAD is now at 315ad74 add cuMemcpy
+   sed 's#@@PREFIX@@#/usr/local#' ffnvcodec.pc.in > ffnvcodec.pc
+   install -m 0755 -d '/usr/local/include/ffnvcodec'
+   install -m 0644 include/ffnvcodec/*.h '/usr/local/include/ffnvcodec'
+   install -m 0755 -d '/usr/local/lib/pkgconfig'
+   install -m 0644 ffnvcodec.pc '/usr/local/lib/pkgconfig'
+
+Install FFmpeg dependencies
+---------------------------
+
+Next, we install tools and libraries required during the FFmpeg build.
+The minimum requirement is `Yasm <https://yasm.tortall.net/>`_.
+Here we additionally install H264 video codec and HTTPS protocol,
+which we use later for verifying the installation.
+
+.. code-block:: bash
+
+   sudo apt -qq update
+   sudo apt -qq install -y yasm libx264-dev libgnutls28-dev
+
+.. code-block:: text
+
+   ... Omitted for brevity ...
+
+   STRIP   install-libavutil-shared
+   Setting up libx264-dev:amd64 (2:0.152.2854+gite9a5903-2) ...
+   Setting up yasm (1.3.0-2build1) ...
+   Setting up libunbound2:amd64 (1.6.7-1ubuntu2.5) ...
+   Setting up libp11-kit-dev:amd64 (0.23.9-2ubuntu0.1) ...
+   Setting up libtasn1-6-dev:amd64 (4.13-2) ...
+   Setting up libtasn1-doc (4.13-2) ...
+   Setting up libgnutlsxx28:amd64 (3.5.18-1ubuntu1.6) ...
+   Setting up libgnutls-dane0:amd64 (3.5.18-1ubuntu1.6) ...
+   Setting up libgnutls-openssl27:amd64 (3.5.18-1ubuntu1.6) ...
+   Setting up libgmpxx4ldbl:amd64 (2:6.1.2+dfsg-2) ...
+   Setting up libidn2-dev:amd64 (2.0.4-1.1ubuntu0.2) ...
+   Setting up libidn2-0-dev (2.0.4-1.1ubuntu0.2) ...
+   Setting up libgmp-dev:amd64 (2:6.1.2+dfsg-2) ...
+   Setting up nettle-dev:amd64 (3.4.1-0ubuntu0.18.04.1) ...
+   Setting up libgnutls28-dev:amd64 (3.5.18-1ubuntu1.6) ...
+   Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
+   Processing triggers for libc-bin (2.27-3ubuntu1.6) ...
+
+Build FFmpeg with NVDEC/NVENC support
+-------------------------------------
+
+Next we download the source code of FFmpeg 4. We use 4.4.2 here.
+
+.. code-block:: bash
+
+   wget -q https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.tar.gz
+   tar -xf n4.4.2.tar.gz
+   cd FFmpeg-n4.4.2
+
+Next we configure FFmpeg build. Note the following:
+
+1. We provide flags like ``-I/usr/local/cuda/include``, ``-L/usr/local/cuda/lib64`` to let the build process know where the CUDA libraries are found.
+2. We provide flags like ``--enable-nvdec`` and ``--enable-nvenc`` to enable NVDEC/NVENC.
+3. We also provide NVCC flags with compute capability ``75``, which corresponds to ``7.5`` of T4. †
+4. We install the library in ``/usr/lib/``.
+
+.. note::
+
+   † The configuration script verifies NVCC by compiling a sample code. By default it uses old compute capability such as ``30``, which is no longer supported by CUDA 11. So it is required to set a correct compute capability.
+
+.. code-block:: bash
+
+   prefix=/usr/
+   ccap=75
+
+   ./configure \
+     --prefix="${prefix}" \
+     --extra-cflags='-I/usr/local/cuda/include' \
+     --extra-ldflags='-L/usr/local/cuda/lib64' \
+     --nvccflags="-gencode arch=compute_${ccap},code=sm_${ccap} -O2" \
+     --disable-doc \
+     --enable-decoder=aac \
+     --enable-decoder=h264 \
+     --enable-decoder=h264_cuvid \
+     --enable-decoder=rawvideo \
+     --enable-indev=lavfi \
+     --enable-encoder=libx264 \
+     --enable-encoder=h264_nvenc \
+     --enable-demuxer=mov \
+     --enable-muxer=mp4 \
+     --enable-filter=scale \
+     --enable-filter=testsrc2 \
+     --enable-protocol=file \
+     --enable-protocol=https \
+     --enable-gnutls \
+     --enable-shared \
+     --enable-gpl \
+     --enable-nonfree \
+     --enable-cuda-nvcc \
+     --enable-libx264 \
+     --enable-nvenc \
+     --enable-cuvid \
+     --enable-nvdec   
+
+.. code-block:: text
+
+   install prefix            /usr/
+   source path               .
+   C compiler                gcc
+   C library                 glibc
+   ARCH                      x86 (generic)
+   big-endian                no
+   runtime cpu detection     yes
+   standalone assembly       yes
+   x86 assembler             yasm
+   MMX enabled               yes
+   MMXEXT enabled            yes
+   3DNow! enabled            yes
+   3DNow! extended enabled   yes
+   SSE enabled               yes
+   SSSE3 enabled             yes
+   AESNI enabled             yes
+   AVX enabled               yes
+   AVX2 enabled              yes
+   AVX-512 enabled           yes
+   XOP enabled               yes
+   FMA3 enabled              yes
+   FMA4 enabled              yes
+   i686 features enabled     yes
+   CMOV is fast              yes
+   EBX available             yes
+   EBP available             yes
+   debug symbols             yes
+   strip symbols             yes
+   optimize for size         no
+   optimizations             yes
+   static                    no
+   shared                    yes
+   postprocessing support    no
+   network support           yes
+   threading support         pthreads
+   safe bitstream reader     yes
+   texi2html enabled         no
+   perl enabled              yes
+   pod2man enabled           yes
+   makeinfo enabled          no
+   makeinfo supports HTML    no
+
+   External libraries:
+   alsa                    libx264                 lzma
+   bzlib                   libxcb                  zlib
+   gnutls                  libxcb_shape
+   iconv                   libxcb_xfixes
+
+   External libraries providing hardware acceleration:
+   cuda                    cuvid                   nvenc
+   cuda_llvm               ffnvcodec               v4l2_m2m
+   cuda_nvcc               nvdec
+
+   Libraries:
+   avcodec                 avformat                swscale
+   avdevice                avutil
+   avfilter                swresample
+
+   Programs:
+   ffmpeg                  ffprobe
+
+   Enabled decoders:
+   aac                     hevc                    rawvideo
+   av1                     mjpeg                   vc1
+   h263                    mpeg1video              vp8
+   h264                    mpeg2video              vp9
+   h264_cuvid              mpeg4
+
+   Enabled encoders:
+   h264_nvenc              libx264
+
+   Enabled hwaccels:
+   av1_nvdec               mpeg1_nvdec             vp8_nvdec
+   h264_nvdec              mpeg2_nvdec             vp9_nvdec
+   hevc_nvdec              mpeg4_nvdec             wmv3_nvdec
+   mjpeg_nvdec             vc1_nvdec
+
+   Enabled parsers:
+   h263                    mpeg4video              vp9
+
+   Enabled demuxers:
+   mov
+
+   Enabled muxers:
+   mov                     mp4
+
+   Enabled protocols:
+   file                    tcp
+   https                   tls
+
+   Enabled filters:
+   aformat                 hflip                   transpose
+   anull                   null                    trim
+   atrim                   scale                   vflip
+   format                  testsrc2
+
+   Enabled bsfs:
+   aac_adtstoasc           null                    vp9_superframe_split
+   h264_mp4toannexb        vp9_superframe
+   
+   Enabled indevs:
+   lavfi
+
+   Enabled outdevs:
+
+   License: nonfree and unredistributable
+
+Now we build and install
+
+.. code-block:: bash
+
+   make clean
+   make -j
+   sudo make install
+
+.. code-block:: text
+
+   ... Omitted for brevity ...
+
+   INSTALL libavdevice/libavdevice.so
+   INSTALL libavfilter/libavfilter.so
+   INSTALL libavformat/libavformat.so
+   INSTALL libavcodec/libavcodec.so
+   INSTALL libswresample/libswresample.so
+   INSTALL libswscale/libswscale.so
+   INSTALL libavutil/libavutil.so
+   INSTALL install-progs-yes
+   INSTALL ffmpeg
+   INSTALL ffprobe
+
+Checking the intallation
+------------------------
+
+To verify that the FFmpeg we built have CUDA support, we can check the list of available decoders and encoders.
+
+.. code-block:: bash
+
+   ffprobe -hide_banner -decoders | grep h264
+
+.. code-block:: text
+
+    VFS..D h264                 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10
+    V..... h264_cuvid           Nvidia CUVID H264 decoder (codec h264)
+
+.. code-block:: bash
+
+   ffmpeg -hide_banner -encoders | grep 264
+
+.. code-block:: text
+
+    V..... libx264              libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (codec h264)
+    V....D h264_nvenc           NVIDIA NVENC H.264 encoder (codec h264)
+
+The following command fetches video from remote server, decode with NVDEC (cuvid) and re-encode with NVENC. If this command does not work, then there is an issue with FFmpeg installation, and TorchAudio would not be able to use them either.
+
+.. code-block:: bash
+
+   $ src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
+
+   $ ffmpeg -hide_banner -y -vsync 0 \
+        -hwaccel cuvid \
+        -hwaccel_output_format cuda \
+        -c:v h264_cuvid \
+        -resize 360x240 \
+        -i "${src}" \
+        -c:a copy \
+        -c:v h264_nvenc \
+        -b:v 5M test.mp4
+
+Note that there is ``Stream #0:0 -> #0:0 (h264 (h264_cuvid) -> h264 (h264_nvenc))``, which means that video is decoded with ``h264_cuvid`` decoder and ``h264_nvenc`` encoder.
+
+.. code-block::
+
+   Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4':
+     Metadata:
+       major_brand     : mp42
+       minor_version   : 512
+       compatible_brands: mp42iso2avc1mp41
+       encoder         : Lavf58.76.100
+     Duration: 00:03:26.04, start: 0.000000, bitrate: 1294 kb/s
+     Stream #0:0(eng): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, bt709), 960x540 [SAR 1:1 DAR 16:9], 1156 kb/s, 29.97 fps, 29.97 tbr, 30k tbn, 59.94 tbc (default)
+       Metadata:
+         handler_name    : ?Mainconcept Video Media Handler
+         vendor_id       : [0][0][0][0]
+     Stream #0:1(eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 128 kb/s (default)
+       Metadata:
+         handler_name    : #Mainconcept MP4 Sound Media Handler
+         vendor_id       : [0][0][0][0]
+   Stream mapping:
+     Stream #0:0 -> #0:0 (h264 (h264_cuvid) -> h264 (h264_nvenc))
+     Stream #0:1 -> #0:1 (copy)
+   Press [q] to stop, [?] for help
+   Output #0, mp4, to 'test.mp4':
+     Metadata:
+       major_brand     : mp42
+       minor_version   : 512
+       compatible_brands: mp42iso2avc1mp41
+       encoder         : Lavf58.76.100
+     Stream #0:0(eng): Video: h264 (Main) (avc1 / 0x31637661), cuda(tv, bt709, progressive), 360x240 [SAR 1:1 DAR 3:2], q=2-31, 5000 kb/s, 29.97 fps, 30k tbn (default)
+       Metadata:
+         handler_name    : ?Mainconcept Video Media Handler
+         vendor_id       : [0][0][0][0]
+         encoder         : Lavc58.134.100 h264_nvenc
+       Side data:
+         cpb: bitrate max/min/avg: 0/0/5000000 buffer size: 10000000 vbv_delay: N/A
+     Stream #0:1(eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 128 kb/s (default)
+       Metadata:
+         handler_name    : #Mainconcept MP4 Sound Media Handler
+         vendor_id       : [0][0][0][0]
+   frame= 6175 fps=1712 q=11.0 Lsize=   37935kB time=00:03:26.01 bitrate=1508.5kbits/s speed=57.1x
+   video:34502kB audio:3234kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.526932%
+
+Using the GPU decoder/encoder from TorchAudio
+---------------------------------------------
+
+Checking the installation
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the FFmpeg is properly working with hardware acceleration, we need to check if TorchAudio can pick it up correctly.
+
+There are utility functions to query the capability of FFmpeg in :py:mod:`torchaudio.utils.ffmpeg_utils`.
+
+You can first use :py:func:`~torchaudio.utils.ffmpeg_utils.get_video_decoders` and :py:func:`~torchaudio.utils.ffmpeg_utils.get_video_encoders` to check if GPU decoders and encoders (such as ``h264_cuvid`` and ``h264_nvenc``) are listed.
+
+It is often the case where there are multiple FFmpeg installations in the system, and TorchAudio is loading one different than expected. In such cases, use of ``ffmpeg`` to check the installation does not help. You can use functions like :py:func:`~torchaudio.utils.ffmpeg_utils.get_build_config` and :py:func:`~torchaudio.utils.ffmpeg_utils.get_versions` to get information about FFmpeg libraries TorchAudio loaded.
+
+.. code-block:: python
+
+   from torchaudio.utils import ffmpeg_utils
+
+   print("Library versions:")
+   print(ffmpeg_utils.get_versions())
+   print("\nBuild config:")
+   print(ffmpeg_utils.get_build_config())
+   print("\nDecoders:")
+   print([k for k in ffmpeg_utils.get_video_decoders().keys() if "cuvid" in k])
+   print("\nEncoders:")
+   print([k for k in ffmpeg_utils.get_video_encoders().keys() if "nvenc" in k])
+
+.. code-block:: text
+
+   Library versions:
+   {'libavutil': (56, 31, 100), 'libavcodec': (58, 54, 100), 'libavformat': (58, 29, 100), 'libavfilter': (7, 57, 100), 'libavdevice': (58, 8, 100)}
+
+   Build config:
+   --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
+
+   Decoders:
+   ['h264_cuvid', 'hevc_cuvid', 'mjpeg_cuvid', 'mpeg1_cuvid', 'mpeg2_cuvid', 'mpeg4_cuvid', 'vc1_cuvid', 'vp8_cuvid', 'vp9_cuvid']
+
+   Encoders:
+   ['h264_nvenc', 'nvenc', 'nvenc_h264', 'nvenc_hevc', 'hevc_nvenc']
+
+
+Using the hardware decoder and encoder
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the installation and the runtime linking work fine, then you can test the GPU decoding with the following.
+
+For the detail on the performance of GPU decoder and encoder please see :ref:`NVDEC tutoial <nvdec_tutorial>` and :ref:`NVENC tutorial <nvenc_tutorial>`.
--- a/docs/source/build.jetson.rst
+++ b/docs/source/build.jetson.rst
+Building on Jetson
+==================
+
+1. Install JetPack
+------------------
+
+JetPack includes the collection of CUDA-related libraries that is required to run PyTorch with CUDA.
+
+Please refer to https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit for the up-to-date instruction.
+
+.. code-block::
+
+   sudo bash -c 'echo "deb https://repo.download.nvidia.com/jetson/common r34.1 main" >> /etc/apt/sources.list.d/nvidia-l4t-apt-source.list'
+   sudo bash -c 'echo "deb https://repo.download.nvidia.com/jetson/t234 r34.1 main" >> /etc/apt/sources.list.d/nvidia-l4t-apt-source.list'
+
+   sudo apt update
+   sudo apt dist-upgrade
+
+   # REBOOT
+
+   sudo apt install nvidia-jetpack
+
+Checking the versions
+~~~~~~~~~~~~~~~~~~~~~
+
+To check the version installed you can use the following commands;
+
+.. code-block::
+
+   # JetPack
+   $ apt list --installed | grep nvidia-jetpack
+
+   nvidia-jetpack-dev/stable,now 5.0.1-b118 arm64 [installed,automatic]
+   nvidia-jetpack-runtime/stable,now 5.0.1-b118 arm64 [installed,automatic]
+   nvidia-jetpack/stable,now 5.0.1-b118 arm64 [installed]
+
+   # CUDA
+   $ apt list --installed | grep cuda-toolkit
+
+   cuda-toolkit-11-4-config-common/stable,now 11.4.243-1 all [installed,automatic]
+   cuda-toolkit-11-4/stable,now 11.4.14-1 arm64 [installed,automatic]
+   cuda-toolkit-11-config-common/stable,now 11.4.243-1 all [installed,automatic]
+   cuda-toolkit-config-common/stable,now 11.4.243-1 all [installed,automatic]
+
+   # cuDNN
+   $ apt list --installed | grep cudnn
+
+   libcudnn8-dev/stable,now 8.3.2.49-1+cuda11.4 arm64 [installed,automatic]
+   libcudnn8-samples/stable,now 8.3.2.49-1+cuda11.4 arm64 [installed,automatic]
+   libcudnn8/stable,now 8.3.2.49-1+cuda11.4 arm64 [installed,automatic]
+   nvidia-cudnn8-dev/stable,now 5.0.1-b118 arm64 [installed,automatic]
+   nvidia-cudnn8-runtime/stable,now 5.0.1-b118 arm64 [installed,automatic]
+
+.. image:: https://download.pytorch.org/torchaudio/doc-assets/jetson-package-versions.png
+   :width: 360px
+
+2. [Optional] Install jtop
+--------------------------
+
+Since Tegra GPUs are not supported by ``nvidia-smi`` command, it is recommended to isntall ``jtop``.
+
+Only super-use can install ``jtop``. So make sure to add ``-U``, so that running ``jtop`` won't require super-user priviledge.
+
+3. Install ``pip`` in user env
+------------------------------
+
+By default, ``pip`` / ``pip3`` commands use the ones from system directory ``/usr/bin/``, and its ``site-packages`` directory is protected and cannot be modified without ``sudo``.
+
+One way to workaround this is to install ``pip`` in user directory.
+
+https://forums.developer.nvidia.com/t/python-3-module-install-folder/181321
+
+.. code-block::
+
+   wget https://bootstrap.pypa.io/get-pip.py
+   python get-pip.py --user
+
+After this verify that ``pip`` command is pointing the one in user directory.
+
+.. code-block::
+
+   $ which pip
+   /home/USER/.local/bin/pip
+
+4. Install PyTorch
+------------------
+
+As of PyTorch 1.13 and torchaudio 0.13, there is no official pre-built binaries for Linux ARM64. Nidia provides custom pre-built binaries for PyTorch, which works with specific JetPack.
+
+Please refer to https://docs.nvidia.com/deeplearning/frameworks/install-pytorch-jetson-platform/index.html for up-to-date instruction on how to install PyTorch.
+
+.. code-block::
+
+   $ package=torch-1.13.0a0+340c4120.nv22.06-cp38-cp38-linux_aarch64.whl
+   $ wget "https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/${package}"
+   $ pip install --no-cache "${package}"
+
+Verify the installation by checking the version and CUDA device accessibility.
+
+.. code-block::
+
+   $ python -c '
+
+   import torch
+   print(torch.__version__)
+   print(torch.cuda.is_available())
+   print(torch.empty((1, 2), device=torch.device("cuda")))
+   '
+   1.13.0a0+410ce96a.nv22.12
+   True
+   tensor([[0., 0.]], device='cuda:0')
+
+.. image:: https://download.pytorch.org/torchaudio/doc-assets/jetson-torch.png
+   :width: 360px
+
+5. Build TorchAudio
+-------------------
+
+1. Install build tools
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block::
+
+   pip install cmake ninja
+
+2. Install dependencies
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block::
+
+   sudo apt install ffmpeg libavformat-dev libavcodec-dev libavutil-dev libavdevice-dev libavfilter-dev
+
+3. Build TorchAudio
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block::
+
+   git clone https://github.com/pytorch/audio
+   cd audio
+   USE_CUDA=1 pip install -v -e . --no-use-pep517
+
+4. Check the installation
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block::
+
+   import torchaudio
+
+   print(torchaudio.__version__)
+
+   torchaudio.utils.ffmpeg_utils.get_build_config()
+
+.. code-block::
+
+   2.0.0a0+2ead941
+   --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
+
+.. image:: https://download.pytorch.org/torchaudio/doc-assets/jetson-verify-build.png
+   :width: 360px
--- a/docs/source/build.linux.rst
+++ b/docs/source/build.linux.rst
+Building on Linux and macOS
+===========================
+
+1. Install Conda and activate conda environment
+-----------------------------------------------
+
+Please folllow the instruction at https://docs.conda.io/en/latest/miniconda.html
+
+2. Install PyTorch
+------------------
+
+Please select the version of PyTorch you want to install from https://pytorch.org/get-started/locally/
+
+Here, we install nightly build.
+
+.. code-block::
+
+   conda install pytorch -c pytorch-nightly
+
+3. Install build tools
+----------------------
+
+.. code-block::
+
+   conda install cmake ninja
+
+4. Clone the torchaudio repository
+----------------------------------
+
+.. code-block::
+
+   git clone https://github.com/pytorch/audio
+   cd audio
+
+5. Build
+--------
+
+.. code-block::
+
+   python setup.py develop
+
+.. note::
+   Due to the complexity of build process, TorchAudio only supports in-place build.
+   To use ``pip``, please use ``--no-use-pep517`` option.
+
+   ``pip install -v -e . --no-use-pep517``
+
+[Optional] Build TorchAudio with a custom built FFmpeg
+------------------------------------------------------
+
+By default, torchaudio tries to build FFmpeg extension with support for multiple FFmpeg versions. This process uses pre-built FFmpeg libraries compiled for specific CPU architectures like ``x86_64`` and ``aarch64`` (``arm64``).
+
+If your CPU is not one of those, then the build process can fail. To workaround, one can disable FFmpeg integration (by setting the environment variable ``USE_FFMPEG=0``) or switch to the single version FFmpeg extension.
+
+To build single version FFmpeg extension, FFmpeg binaries must be provided by user and available in the build environment. To do so, install FFmpeg and set ``FFMPEG_ROOT`` environment variable to specify the location of FFmpeg.
+
+.. code-block::
+
+   conda install -c conda-forge ffmpeg
+   FFMPEG_ROOT=${CONDA_PREFIX} python setup.py develop
--- a/docs/source/build.rst
+++ b/docs/source/build.rst
+Building from source
+====================
+
+TorchAudio integrates PyTorch for numerical computation and third party libraries for multimedia I/O. It requires the following tools to build from source.
+
+- `PyTorch <https://pytorch.org>`_
+- `CMake <https://cmake.org/>`_
+- `Ninja <https://ninja-build.org/>`_
+- C++ complier with C++ 17 support
+   - `GCC <https://gcc.gnu.org/>`_ (Linux)
+   - `Clang <https://clang.llvm.org/>`_ (macOS)
+   - `MSVC <https://visualstudio.microsoft.com>`_  2019 or newer (Windows)
+- `CUDA toolkit <https://developer.nvidia.com/cudnn>`_ and `cuDNN <https://developer.nvidia.com/cudnn>`_ (if building CUDA extension)
+
+Most of the tools are available in `Conda <https://conda.io/>`_, so we recommend using conda.
+
+.. toctree::
+   :maxdepth: 1
+
+   build.linux
+   build.windows
+   build.jetson
+
+Customizing the build
+---------------------
+
+TorchAudio's integration with third party libraries can be enabled/disabled via
+environment variables.
+
+They can be enabled by passing ``1`` and disabled by ``0``.
+
+- ``BUILD_SOX``: Enable/disable I/O features based on libsox.
+- ``BUILD_KALDI``: Enable/disable feature extraction based on Kaldi.
+- ``BUILD_RNNT``: Enable/disable custom RNN-T loss function.
+- ``USE_FFMPEG``: Enable/disable I/O features based on FFmpeg libraries.
+- ``USE_ROCM``: Enable/disable AMD ROCm support.
+- ``USE_CUDA``: Enable/disable CUDA support.
+
+For the latest configurations and their default values, please check the source code.
+https://github.com/pytorch/audio/blob/main/tools/setup_helpers/extension.py
--- a/docs/source/build.windows.rst
+++ b/docs/source/build.windows.rst
+Building on Windows
+===================
+
+To build TorchAudio on Windows, we need to enable C++ compiler and install build tools and runtime dependencies.
+
+We use Microsoft Visual C++ for compiling C++ and Conda for managing the other build tools and runtime dependencies.
+
+1. Install build tools
+----------------------
+
+MSVC
+~~~~
+
+Please follow the instruction at https://visualstudio.microsoft.com/downloads/, and make sure to install C++ development tools.
+
+.. note::
+
+   The official binary distribution are compiled with MSVC 2019.
+   The following section uses path from MSVC 2019 Community Edition.
+
+Conda
+~~~~~
+
+Please follow the instruction at https://docs.conda.io/en/latest/miniconda.html.
+
+2. Start the dev environment
+----------------------------
+
+In the following, we need to use C++ compiler (``cl``), and Conda package manager (``conda``).
+We also use Bash for the sake of similar experience to Linux/macOS.
+
+To do so, the following three steps are required.
+
+1. Open command prompt
+2. Enable developer environment
+3. [Optional] Launch bash
+
+|
+
+The following combination is known to work.
+
+1. Launch Anaconda3 Command Prompt.
+
+   |
+
+   .. image:: https://download.pytorch.org/torchaudio/doc-assets/windows-conda.png
+      :width: 360px
+
+   |
+
+   Please make sure that ``conda`` command is recognized.
+
+   |
+
+   .. image:: https://download.pytorch.org/torchaudio/doc-assets/windows-conda2.png
+      :width: 360px
+
+   |
+
+2. Activate dev tools by running the following command.
+
+   We need to use the MSVC x64 toolset for compilation.
+   To enable the toolset, one can use ``vcvarsall.bat`` or ``vcvars64.bat`` file, which
+   are found under Visual Studio's installation folder, under ``VC\Auxiliary\Build\``.
+   More information are available at https://docs.microsoft.com/en-us/cpp/build/how-to-enable-a-64-bit-visual-cpp-toolset-on-the-command-line?view=msvc-160#use-vcvarsallbat-to-set-a-64-bit-hosted-build-architecture
+
+   .. code-block::
+
+      call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
+
+   Please makes sure that ``cl`` command is recognized.
+
+   .. image:: https://download.pytorch.org/torchaudio/doc-assets/windows-msvc.png
+      :width: 360px
+
+3. [Optional] Launch bash with the following command.
+
+   If you want a similar UX as Linux/macOS, you can launch Bash. However, please note that in Bash environment, the file paths are different from native Windows style, and ``torchaudio.datasets`` module does not work.
+
+   .. code-block::
+
+      Miniconda3\Library\bin\bash.exe
+
+   .. image:: https://download.pytorch.org/torchaudio/doc-assets/windows-bash.png
+      :width: 360px
+
+3. Install PyTorch
+------------------
+
+Please refer to https://pytorch.org/get-started/locally/ for the up-to-date way to install PyTorch.
+
+The following command installs the nightly build version of PyTorch.
+
+.. code-block::
+
+   # CPU-only
+   conda install pytorch cpuonly -c pytorch-nightly
+
+   # With CUDA support
+   conda install pytorch pytorch-cuda=11.7 -c pytorch-nightly -c nvidia
+
+When installing CUDA-enabled version, it also install CUDA toolkit.
+
+4. [Optional] cuDNN
+-------------------
+
+If you intend to build CUDA-related features, please install cuDNN.
+
+Download CuDNN from https://developer.nvidia.com/cudnn, and extract files in
+the same directories as CUDA toolkit.
+
+When using conda, the directories are ``${CONDA_PREFIX}/bin``, ``${CONDA_PREFIX}/include``, ``${CONDA_PREFIX}/Lib/x64``.
+
+5. Install external dependencies
+--------------------------------
+
+.. code-block::
+
+   conda install cmake ninja
+
+6. Build TorchAudio
+-------------------
+
+Now that we have everything ready, we can build TorchAudio.
+
+.. code-block::
+
+   git clone https://github.com/pytorch/audio
+   cd audio
+
+
+.. code-block::
+
+   # In Command Prompt
+   python setup.py develop
+
+.. code-block::
+
+   # In Bash
+   python setup.py develop
+
+.. note::
+   Due to the complexity of build process, TorchAudio only supports in-place build.
+   To use ``pip``, please use ``--no-use-pep517`` option.
+
+   ``pip install -v -e . --no-use-pep517``
+
+[Optional] Build TorchAudio with a custom FFmpeg
+------------------------------------------------
+
+By default, torchaudio tries to build FFmpeg extension with support for multiple FFmpeg versions. This process uses pre-built FFmpeg libraries compiled for specific CPU architectures like ``x86_64``.
+
+If your CPU is different, then the build process can fail. To workaround, one can disable FFmpeg integration (by setting the environment variable ``USE_FFMPEG=0``) or switch to the single version FFmpeg extension.
+
+To build single version FFmpeg extension, FFmpeg binaries must be provided by user and available in the build environment. To do so, install FFmpeg and set ``FFMPEG_ROOT`` environment variable to specify the location of FFmpeg.
+
+.. code-block::
+
+   conda install -c conda-forge ffmpeg
+   FFMPEG_ROOT=${CONDA_PREFIX}/Library python setup.py develop
+
+   
+[Optional] Building FFmpeg from source
+--------------------------------------
+
+The following section illustrates a way to build FFmpeg libraries from source.
+
+Conda-forge's FFmpeg package comes with support for major codecs and GPU decoders, so regular users and developers do not need to build FFmpeg from source.
+
+If you are not using Conda, then you can either find a pre-built binary distribution or build FFmpeg by yourself.
+
+Also, in case torchaudio developer needs to update and customize the CI for FFmpeg build, this section might be helpful.
+
+1. Install MSYS2
+~~~~~~~~~~~~~~~~
+
+To build FFmpeg in a way it is usable from the TorchAudio development environment, we need to build binaries native to ``MINGW64``. To do so, we need tools required by FFmpeg's build process, such as ``pkg-config`` and ``make``,  that work in ``MINGW64`` environment. For this purpose, we use MSYS2.
+
+FFmpeg's official documentation touches this https://trac.ffmpeg.org/wiki/CompilationGuide/MinGW
+
+Please follow the instruction at https://www.msys2.org/ to install MSYS2.
+
+.. note::
+
+   In CI environment, often `Chocolatery <https://chocolatey.org/>`_ can be used to install MSYS2.
+
+2. Launch MSYS2
+~~~~~~~~~~~~~~~
+
+Use the shortcut to launch MSYS2 (MINGW64).
+
+.. image:: https://download.pytorch.org/torchaudio/doc-assets/windows-msys2.png
+   :width: 360px
+
+.. note::
+
+   The Bash environment in MSYS2 does not play well with Conda env, so do not add Conda initialization script in ``~/.bashrc`` of MSYS2 environment. (i.e. ``C:\msys2\home\USER\.bashrc``) Instead, add it in ``C:\Users\USER\.bashrc``)
+
+3. Install build tools
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block::
+
+   $ pacman -S mingw-w64-x86_64-make
+   $ pacman -S mingw-w64-x86_64-yasm
+
+After the installation, you should have packages similar to the following;
+
+.. code-block::
+
+   $ pacman -Qe
+   base 2020.12-1
+   base-devel 2022.01-2
+   filesystem 2023.01-2
+   mingw-w64-x86_64-make 4.3-1
+   mingw-w64-x86_64-pkgconf 1.8.0-2
+   mingw-w64-x86_64-yasm 1.3.0-4
+   msys2-runtime 3.4.3-5
+
+4. Build FFmpeg
+~~~~~~~~~~~~~~~
+
+Check out FFmpeg source code.
+
+.. code-block::
+
+   git clone https://github.com/ffmpeg/ffmpeg
+   cd ffmpeg
+   git checkout <VERSION>
+
+Build
+
+.. code-block::
+
+   ./configure --toolchain=msvc
+   make -j
+
+If the build succeeds, ``ffmpeg.exe`` should be found in the same directory. Make sure that you can run it.
+
+5. Verify the build
+~~~~~~~~~~~~~~~~~~~
+
+Check that the resulting FFmpeg binary is accessible from Conda env
+
+Now launch a new command prompt and enable the TorchAudio development environment. Make sure that you can run the ``ffmpeg.exe`` command generated in the previous step.
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -25,6 +25,7 @@ from datetime import datetime

 sys.path.insert(0, os.path.abspath("."))

+
 import pytorch_sphinx_theme

 # -- General configuration ------------------------------------------------
@@ -52,8 +53,20 @@ extensions = [
    "sphinxcontrib.bibtex",
    "sphinx_gallery.gen_gallery",
    "nbsphinx",
+    "breathe",
 ]

+breathe_projects = {"libtorchaudio": "cpp/xml"}
+
+breathe_default_project = "libtorchaudio"
+
+breathe_projects_source = {
+    "libtorchaudio": (
+        "../../torchaudio/csrc/ffmpeg/",
+        ["ffmpeg.h"],
+    )
+}
+
 nbsphinx_requirejs_path = ""

 autodoc_member_order = "bysource"
@@ -114,6 +127,22 @@ def _get_pattern():
    return ret


+def reset_mpl(gallery_conf, fname):
+    from sphinx_gallery.scrapers import _reset_matplotlib
+
+    _reset_matplotlib(gallery_conf, fname)
+    import matplotlib
+
+    matplotlib.rcParams.update(
+        {
+            "image.interpolation": "none",
+            "figure.figsize": (9.6, 4.8),
+            "font.size": 8.0,
+            "axes.axisbelow": True,
+        }
+    )
+
+
 sphinx_gallery_conf = {
    "examples_dirs": [
        "../../examples/tutorials",
@@ -126,6 +155,7 @@ sphinx_gallery_conf = {
    "promote_jupyter_magic": True,
    "first_notebook_cell": None,
    "doc_module": ("torchaudio",),
+    "reset_modules": (reset_mpl, "seaborn"),
 }
 autosummary_generate = True

@@ -164,6 +194,15 @@ else:
    version = f"Nightly Build ({torchaudio.__version__})"
    release = "nightly"

+
+#
+# Specify the version of the current stable release.
+# Used in `docs/source/_templates/breadcrumbs.html`
+#
+# https://stackoverflow.com/a/33845358/1106930
+#
+html_context = {"version_stable": "2.1.1"}
+
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
@@ -201,7 +240,7 @@ html_theme_options = {
    "display_version": True,
    "logo_only": True,
    "navigation_with_keys": True,
-    "analytics_id": "UA-117752657-2",
+    "analytics_id": "GTM-T8XT4PS",
 }

 # Add any paths that contain custom static files (such as style sheets) here,

--- a/docs/source/functional.rst
+++ b/docs/source/functional.rst
@@ -26,6 +26,23 @@ Utility
   apply_codec
   resample
   loudness
+   convolve
+   fftconvolve
+   add_noise
+   preemphasis
+   deemphasis
+   speed
+   frechet_distance
+
+Forced Alignment
+----------------
+.. autosummary::
+   :toctree: generated
+   :nosignatures:
+
+   forced_align
+   merge_tokens
+   TokenSpan


 Filtering
@@ -73,7 +90,6 @@ Feature Extractions
   compute_deltas
   detect_pitch_frequency
   sliding_window_cmn
-   compute_kaldi_pitch
   spectral_centroid

 Multi-channel

--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -22,6 +22,18 @@ model implementations and application components.
   logo
   references

+.. toctree::
+   :maxdepth: 2
+   :caption: Installation
+   :hidden:
+
+   installation
+   build
+   build.linux
+   build.windows
+   build.jetson
+   build.ffmpeg
+
 .. toctree::
   :maxdepth: 1
   :caption: API Tutorials
@@ -32,12 +44,20 @@ model implementations and application components.
   tutorials/streamreader_advanced_tutorial
   tutorials/streamwriter_basic_tutorial
   tutorials/streamwriter_advanced
-   hw_acceleration_tutorial
+   tutorials/nvdec_tutorial
+   tutorials/nvenc_tutorial

+   tutorials/effector_tutorial
   tutorials/audio_resampling_tutorial
   tutorials/audio_data_augmentation_tutorial
   tutorials/audio_feature_extractions_tutorial
   tutorials/audio_feature_augmentation_tutorial
+   tutorials/ctc_forced_alignment_api_tutorial
+
+   tutorials/oscillator_tutorial
+   tutorials/additive_synthesis_tutorial
+   tutorials/filter_design_tutorial
+   tutorials/subtractive_synthesis_tutorial

   tutorials/audio_datasets_tutorial

@@ -48,12 +68,15 @@ model implementations and application components.

   tutorials/speech_recognition_pipeline_tutorial
   tutorials/asr_inference_with_ctc_decoder_tutorial
+   tutorials/asr_inference_with_cuda_ctc_decoder_tutorial
   tutorials/online_asr_tutorial
   tutorials/device_asr
   tutorials/forced_alignment_tutorial
+   tutorials/forced_alignment_for_multilingual_data_tutorial
   tutorials/tacotron2_pipeline_tutorial
   tutorials/mvdr_tutorial
   tutorials/hybrid_demucs_tutorial
+   tutorials/squim_tutorial

 .. toctree::
   :maxdepth: 1
@@ -64,15 +87,15 @@ model implementations and application components.
   Emformer RNN-T ASR <https://github.com/pytorch/audio/tree/main/examples/asr/emformer_rnnt>
   Conv-TasNet Source Separation <https://github.com/pytorch/audio/tree/main/examples/source_separation>
   HuBERT Pre-training and Fine-tuning (ASR) <https://github.com/pytorch/audio/tree/main/examples/hubert>
+   Real-time AV-ASR <https://github.com/pytorch/audio/tree/main/examples/avsr>

 .. toctree::
   :maxdepth: 1
-   :caption: API Reference
+   :caption: Python API Reference
   :hidden:

   torchaudio
   io
-   backend
   functional
   transforms
   datasets
@@ -102,6 +125,13 @@ Tutorials

 .. customcardstart::

+.. customcarditem::
+   :header: AM inference with CUDA CTC Beam Seach Decoder
+   :card_description: Learn how to perform ASR beam search decoding with GPU, using <code>torchaudio.models.decoder.cuda_ctc_decoder</code>.
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/asr_inference_with_ctc_decoder_tutorial.png
+   :link: tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.html
+   :tags: Pipelines,ASR,CTC-Decoder,CUDA-CTC-Decoder
+
 .. customcarditem::
   :header: Loading waveform Tensors from files and saving them
   :card_description: Learn how to query/load audio files and save waveform tensors to files, using <code>torchaudio.info</code>, <code>torchaudio.load</code> and <code>torchaudio.save</code> functions.
@@ -109,6 +139,20 @@ Tutorials
   :link: tutorials/audio_io_tutorial.html
   :tags: I/O

+.. customcarditem::
+   :header: CTC Forced Alignment API
+   :card_description: Learn how to use TorchAudio's CTC forced alignment API (<code>torchaudio.functional.forced_align</code>).
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/ctc_forced_alignment_api_tutorial.png
+   :link: tutorials/ctc_forced_alignment_api_tutorial.html
+   :tags: CTC,Forced-Alignment
+
+.. customcarditem::
+   :header: Forced alignment for multilingual data
+   :card_description: Learn how to use align multiligual data using TorchAudio's CTC forced alignment API (<code>torchaudio.functional.forced_align</code>) and a multiligual Wav2Vec2 model.
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/forced_alignment_for_multilingual_data_tutorial.png
+   :link: tutorials/forced_alignment_for_multilingual_data_tutorial.html
+   :tags: Forced-Alignment
+
 .. customcarditem::
   :header: Streaming media decoding with StreamReader
   :card_description: Learn how to load audio/video to Tensors using <code>torchaudio.io.StreamReader</code> class.
@@ -129,7 +173,7 @@ Tutorials
   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/streamwriter_basic_tutorial.gif
   :link: tutorials/streamwriter_basic_tutorial.html
   :tags: I/O,StreamWriter
-   
+
 .. customcarditem::
   :header: Playing media with StreamWriter
   :card_description: Learn how to play audio/video with <code>torchaudio.io.StreamWriter</code>.
@@ -138,11 +182,25 @@ Tutorials
   :tags: I/O,StreamWriter

 .. customcarditem::
-   :header: Hardware accelerated video I/O with NVDEC/NVENC
-   :card_description: Learn how to setup and use HW accelerated video I/O.
+   :header: Hardware accelerated video decoding with NVDEC
+   :card_description: Learn how to use HW video decoder.
   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/hw_acceleration_tutorial.png
-   :link: hw_acceleration_tutorial.html
-   :tags: I/O,StreamReader,StreamWriter
+   :link: tutorials/nvdec_tutorial.html
+   :tags: I/O,StreamReader
+
+.. customcarditem::
+   :header: Hardware accelerated video encoding with NVENC
+   :card_description: Learn how to use HW video encoder.
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/hw_acceleration_tutorial.png
+   :link: tutorials/nvenc_tutorial.html
+   :tags: I/O,StreamWriter
+
+.. customcarditem::
+   :header: Apply effects and codecs to waveform
+   :card_description: Learn how to apply effects and codecs to waveform using <code>torchaudio.io.AudioEffector</code>.
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/effector_tutorial.png
+   :link: tutorials/effector_tutorial.html
+   :tags: Preprocessing

 .. customcarditem::
   :header: Audio resampling with bandlimited sinc interpolation
@@ -199,7 +257,7 @@ Tutorials
   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/online_asr_tutorial.gif
   :link: tutorials/online_asr_tutorial.html
   :tags: Pipelines,ASR,RNNT,StreamReader
-   
+
 .. customcarditem::
   :header: Real-time microphone ASR with Emformer RNN-T
   :card_description: Learn how to transcribe speech fomr microphone with Emformer RNN-T (<code>torchaudio.pipelines.RNNTBundle</code>) and <code>torchaudio.io.StreamReader</code>.
@@ -220,7 +278,7 @@ Tutorials
   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/tacotron2_pipeline_tutorial.png
   :link: tutorials/tacotron2_pipeline_tutorial.html
   :tags: Pipelines,TTS-(Text-to-Speech)
-   
+
 .. customcarditem::
   :header: Speech Enhancement with MVDR Beamforming
   :card_description: Learn how to improve speech quality with MVDR Beamforming.
@@ -235,6 +293,12 @@ Tutorials
   :link: tutorials/hybrid_demucs_tutorial.html
   :tags: Pipelines,Source-Separation

+.. customcarditem::
+   :header: Torchaudio-Squim: Non-intrusive Speech Assessment in TorchAudio
+   :card_description: Learn how to estimate subjective and objective metrics with pre-trained TorchAudio-SQUIM models (<code>torchaudio.pipelines.SQUIMObjective</code>).
+   :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/squim_tutorial.png
+   :link: tutorials/squim_tutorial.html
+   :tags: Pipelines,Speech Assessment,Speech Enhancement
 .. customcardend::


@@ -267,3 +331,14 @@ In BibTeX format:
      journal={arXiv preprint arXiv:2110.15018},
      year={2021}
    }
+
+.. code-block:: bibtex
+
+   @misc{hwang2023torchaudio,
+      title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch}, 
+      author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao and Robin Scheibler and Samuele Cornell and Sean Kim and Stavros Petridis},
+      year={2023},
+      eprint={2310.17864},
+      archivePrefix={arXiv},
+      primaryClass={eess.AS}
+   }
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
+Installing pre-built binaries
+=============================
+
+``torchaudio`` has binary distributions for PyPI (``pip``) and Anaconda (``conda``).
+
+Please refer to https://pytorch.org/get-started/locally/ for the details.
+
+.. note::
+
+   Each ``torchaudio`` package is compiled against specific version of ``torch``.
+   Please refer to the following table and install the correct pair of ``torch`` and ``torchaudio``.
+
+.. note::
+
+   Starting ``0.10``, torchaudio has CPU-only and CUDA-enabled binary distributions,
+   each of which requires a corresponding PyTorch distribution.
+
+.. note::
+   This software was compiled against an unmodified copies of FFmpeg, with the specific rpath removed so as to enable the use of system libraries. The LGPL source can be downloaded from the following locations: `n4.1.8 <https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.4>`__ (`license <https://github.com/FFmpeg/FFmpeg/blob/n4.4.4/COPYING.LGPLv2.1>`__), `n5.0.3 <https://github.com/FFmpeg/FFmpeg/releases/tag/n5.0.3>`__ (`license <https://github.com/FFmpeg/FFmpeg/blob/n5.0.3/COPYING.LGPLv2.1>`__) and `n6.0 <https://github.com/FFmpeg/FFmpeg/releases/tag/n6.0>`__ (`license <https://github.com/FFmpeg/FFmpeg/blob/n6.0/COPYING.LGPLv2.1>`__).
+
+Dependencies
+------------
+
+* `PyTorch <https://pytorch.org>`_
+
+  Please refer to the compatibility matrix bellow for supported PyTorch versions.
+
+.. _optional_dependencies:
+
+Optional Dependencies
+~~~~~~~~~~~~~~~~~~~~~
+
+.. _ffmpeg_dependency:
+
+* `FFmpeg <https://ffmpeg.org>`__
+
+  Required to use :py:mod:`torchaudio.io` module. and ``backend="ffmpeg"`` in
+  `I/O functions <./torchaudio.html#i-o>`__.
+
+  Starting version 2.1, TorchAudio official binary distributions are compatible with
+  FFmpeg version 6, 5 and 4. (>=4.4, <7). At runtime, TorchAudio first looks for FFmpeg 6,
+  if not found, then it continues to looks for 5 and move on to 4.
+
+  There are multiple ways to install FFmpeg libraries.
+  Please refer to the official documentation for how to install FFmpeg.
+  If you are using Anaconda Python distribution,
+  ``conda install -c conda-forge 'ffmpeg<7'`` will install
+  compatible FFmpeg libraries.
+
+  If you need to specify the version of FFmpeg TorchAudio searches and links, you can
+  specify it via the environment variable ``TORCHAUDIO_USE_FFMPEG_VERSION``. For example,
+  by setting ``TORCHAUDIO_USE_FFMPEG_VERSION=5``, TorchAudio will only look for FFmpeg
+  5.
+
+  If for some reason, this search mechanism is causing an issue, you can disable
+  the FFmpeg integration entirely by setting the environment variable
+  ``TORCHAUDIO_USE_FFMPEG=0``.
+
+  There are multiple ways to install FFmpeg libraries.
+  If you are using Anaconda Python distribution,
+  ``conda install -c conda-forge 'ffmpeg<7'`` will install
+  compatible FFmpeg libraries.
+
+  .. note::
+
+     When searching for FFmpeg installation, TorchAudio looks for library files
+     which have names with version numbers.
+     That is, ``libavutil.so.<VERSION>`` for Linux, ``libavutil.<VERSION>.dylib``
+     for macOS, and ``avutil-<VERSION>.dll`` for Windows.
+     Many public pre-built binaries follow this naming scheme, but some distributions
+     have un-versioned file names.
+     If you are having difficulties detecting FFmpeg, double check that the library
+     files you installed follow this naming scheme, (and then make sure
+     that they are in one of the directories listed in library search path.)
+
+* `SoX <https://sox.sourceforge.net/>`__
+
+  Required to use ``backend="sox"`` in `I/O functions <./torchaudio.html#i-o>`__.
+
+  Starting version 2.1, TorchAudio requires separately installed libsox.
+
+  If dynamic linking is causing an issue, you can set the environment variable
+  ``TORCHAUDIO_USE_SOX=0``, and TorchAudio won't use SoX.
+
+  .. note::
+
+     TorchAudio looks for a library file with unversioned name, that is ``libsox.so``
+     for Linux, and ``libsox.dylib`` for macOS. Some package managers install the library
+     file with different name. For example, aptitude on Ubuntu installs ``libsox.so.3``.
+     To have TorchAudio link against it, you can create a symbolic link to it with name
+     ``libsox.so`` (and put the symlink in a library search path).
+
+  .. note::
+     TorchAudio is tested on libsox 14.4.2. (And it is unlikely that other
+     versions would work.)
+
+* `SoundFile <https://pysoundfile.readthedocs.io/>`__
+
+  Required to use ``backend="soundfile"`` in `I/O functions <./torchaudio.html#i-o>`__.
+
+* `sentencepiece <https://pypi.org/project/sentencepiece/>`__
+
+  Required for performing automatic speech recognition with :ref:`Emformer RNN-T<RNNT>`.
+  You can install it by running ``pip install sentencepiece``.
+
+* `deep-phonemizer <https://pypi.org/project/deep-phonemizer/>`__
+
+  Required for performing text-to-speech with :ref:`Tacotron2`.
+
+* `kaldi_io <https://pypi.org/project/kaldi-io/>`__
+
+  Required to use :py:mod:`torchaudio.kaldi_io` module.
+
+   
+Compatibility Matrix
+--------------------
+
+The official binary distributions of TorchAudio contain extension modules
+which are written in C++ and linked against specific versions of PyTorch.
+
+TorchAudio and PyTorch from different releases cannot be used together.
+Please refer to the following table for the matching versions.
+
+.. list-table::
+   :header-rows: 1
+
+   * - ``PyTorch``
+     - ``TorchAudio``
+     - ``Python``
+   * - ``2.1.0``
+     - ``2.1.0``
+     - ``>=3.8``, ``<=3.11``
+   * - ``2.0.1``
+     - ``2.0.2``
+     - ``>=3.8``, ``<=3.11``
+   * - ``2.0.0``
+     - ``2.0.1``
+     - ``>=3.8``, ``<=3.11``
+   * - ``1.13.1``
+     - ``0.13.1``
+     - ``>=3.7``, ``<=3.10``
+   * - ``1.13.0``
+     - ``0.13.0``
+     - ``>=3.7``, ``<=3.10``
+   * - ``1.12.1``
+     - ``0.12.1``
+     - ``>=3.7``, ``<=3.10``
+   * - ``1.12.0``
+     - ``0.12.0``
+     - ``>=3.7``, ``<=3.10``
+   * - ``1.11.0``
+     - ``0.11.0``
+     - ``>=3.7``, ``<=3.9``
+   * - ``1.10.0``
+     - ``0.10.0``
+     - ``>=3.6``, ``<=3.9``
+   * - ``1.9.1``
+     - ``0.9.1``
+     - ``>=3.6``, ``<=3.9``
+   * - ``1.8.1``
+     - ``0.8.1``
+     - ``>=3.6``, ``<=3.9``
+   * - ``1.7.1``
+     - ``0.7.2``
+     - ``>=3.6``, ``<=3.9``
+   * - ``1.7.0``
+     - ``0.7.0``
+     - ``>=3.6``, ``<=3.8``
+   * - ``1.6.0``
+     - ``0.6.0``
+     - ``>=3.6``, ``<=3.8``
+   * - ``1.5.0``
+     - ``0.5.0``
+     - ``>=3.5``, ``<=3.8``
+   * - ``1.4.0``
+     - ``0.4.0``
+     - ``==2.7``, ``>=3.5``, ``<=3.8``
--- a/docs/source/io.rst
+++ b/docs/source/io.rst
@@ -12,6 +12,8 @@ torchaudio.io

   StreamReader
   StreamWriter
+   AudioEffector
+   play_audio

 .. rubric:: Tutorials using ``torchaudio.io``


--- a/docs/source/models.decoder.rst
+++ b/docs/source/models.decoder.rst
@@ -20,3 +20,19 @@ CTC Decoder
 .. rubric:: Tutorials using CTC Decoder

 .. minigallery:: torchaudio.models.decoder.CTCDecoder
+
+CUDA CTC Decoder
+----------------
+
+.. autosummary::
+   :toctree: generated
+   :nosignatures:
+   :template: autosummary/cuda_ctc_decoder_class.rst
+
+   CUCTCDecoder
+   cuda_ctc_decoder
+
+
+.. rubric:: Tutorials using CUDA CTC Decoder
+
+.. minigallery:: torchaudio.models.decoder.CUCTCDecoder
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -7,15 +7,13 @@ torchaudio.models

 The ``torchaudio.models`` subpackage contains definitions of models for addressing common audio tasks.

-For pre-trained models, please refer to :mod:`torchaudio.pipelines` module.
-
-Model Definitions
-----------------
+.. note::
+   For models with pre-trained parameters, please refer to :mod:`torchaudio.pipelines` module.

 Model defintions are responsible for constructing computation graphs and executing them.

 Some models have complex structure and variations.
-For such models, `Factory Functions`_ are provided.
+For such models, factory functions are provided.

 .. autosummary::
   :toctree: generated
@@ -30,42 +28,9 @@ For such models, `Factory Functions`_ are provided.
   HuBERTPretrainModel
   RNNT
   RNNTBeamSearch
+   SquimObjective
+   SquimSubjective
   Tacotron2
   Wav2Letter
   Wav2Vec2Model
   WaveRNN
-
-Factory Functions
-----------------
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   conv_tasnet_base
-   emformer_rnnt_model
-   emformer_rnnt_base
-   wav2vec2_model
-   wav2vec2_base
-   wav2vec2_large
-   wav2vec2_large_lv60k
-   hubert_base
-   hubert_large
-   hubert_xlarge
-   hubert_pretrain_model
-   hubert_pretrain_base
-   hubert_pretrain_large
-   hubert_pretrain_xlarge
-   hdemucs_low
-   hdemucs_medium
-   hdemucs_high
-
-Utility Functions
-----------------
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   ~wav2vec2.utils.import_fairseq_model
-   ~wav2vec2.utils.import_huggingface_model