Swap decoder/encoder implementation

Differential Revision: D50677606 Pull Request resolved: https://github.com/pytorch/audio/pull/3681

Swap decoder/encoder implementation
Differential Revision: D50677606 Pull Request resolved: https://github.com/pytorch/audio/pull/3681
36f5010b · moto-meta · GitHub · 2a0f4c06 · 36f5010b · 36f5010b
Unverified Commit 36f5010b authored Oct 26, 2023 by moto-meta Committed by GitHub Oct 26, 2023
14 changed files
--- a/cmake/TorchAudioHelper.cmake
+++ b/cmake/TorchAudioHelper.cmake
@@ -43,7 +43,7 @@ endfunction()
 function(torio_library name source include_dirs link_libraries compile_defs)
  _library(
-    torchaudio/lib
+    torio/lib
    "${name}"
    "${source}"
    "${include_dirs}"
@@ -104,7 +104,7 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
  endfunction()
  function(torio_extension name sources include_dirs libraries definitions)
    _extension(
-      torchaudio/lib
+      torio/lib
      "${name}"
      "${sources}"
      "${include_dirs}"

--- a/docs/source/_templates/autosummary/io_class.rst
+++ b/docs/source/_templates/autosummary/io_class.rst
@@ -73,7 +73,7 @@ Support Structures
 {{ item | underline("~") }}
-.. autoclass:: torchaudio.io._stream_reader.{{item}}()
+.. autoclass:: torio.io._streaming_media_decoder.{{item}}()
   :members:
 {%- endfor %}

--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -385,6 +385,13 @@ def inject_minigalleries(app, what, name, obj, options, lines):
 def setup(app):
    app.connect("autodoc-process-docstring", inject_minigalleries)
+    from torchaudio.io import StreamReader, StreamWriter
+    # need to assign the names here, otherwise autodoc won't document these classes,
+    # and will instead just say 'alias of ...'
+    StreamReader.__name__ = "StreamReader"
+    StreamWriter.__name__ = "StreamWriter"
 from custom_directives import CustomCardEnd, CustomCardItem, CustomCardStart, SupportedDevices, SupportedProperties

--- a/src/torchaudio/_backend/utils.py
+++ b/src/torchaudio/_backend/utils.py
@@ -4,8 +4,9 @@ from typing import BinaryIO, Dict, Optional, Tuple, Type, Union
 import torch
-from torchaudio._extension import lazy_import_ffmpeg_ext, lazy_import_sox_ext
+from torchaudio._extension import lazy_import_sox_ext
 from torchaudio.io import CodecConfig
+from torio._extension import lazy_import_ffmpeg_ext
 from . import soundfile_backend

--- a/src/torchaudio/_extension/__init__.py
+++ b/src/torchaudio/_extension/__init__.py
@@ -4,7 +4,7 @@ import sys
 from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op
-from .utils import _check_cuda_version, _init_dll_path, _init_ffmpeg, _init_sox, _LazyImporter, _load_lib
+from .utils import _check_cuda_version, _init_dll_path, _init_sox, _LazyImporter, _load_lib
 _LG = logging.getLogger(__name__)
@@ -18,7 +18,6 @@ __all__ = [
    "_IS_TORCHAUDIO_EXT_AVAILABLE",
    "_IS_RIR_AVAILABLE",
    "lazy_import_sox_ext",
-    "lazy_import_ffmpeg_ext",
 ]
@@ -57,18 +56,6 @@ def lazy_import_sox_ext():
    return _SOX_EXT
-_FFMPEG_EXT = None
-def lazy_import_ffmpeg_ext():
-    """Load FFmpeg integration based on availability in lazy manner"""
-    global _FFMPEG_EXT
-    if _FFMPEG_EXT is None:
-        _FFMPEG_EXT = _LazyImporter("_torchaudio_ffmpeg", _init_ffmpeg)
-    return _FFMPEG_EXT
 fail_if_no_rir = (
    no_op
    if _IS_RIR_AVAILABLE

--- a/src/torchaudio/_extension/utils.py
+++ b/src/torchaudio/_extension/utils.py
@@ -4,7 +4,6 @@ The implementations here should be stateless.
 They should not depend on external state.
 Anything that depends on external state should happen in __init__.py
 """
 import importlib
 import logging
 import os
@@ -107,57 +106,6 @@ def _init_sox():
    return ext
-_FFMPEG_VERS = ["6", "5", "4", ""]
-def _find_versionsed_ffmpeg_extension(version: str):
-    _LG.debug("Attempting to load FFmpeg%s", version)
-    ext = f"torchaudio.lib._torio_ffmpeg{version}"
-    lib = f"libtorio_ffmpeg{version}"
-    if not importlib.util.find_spec(ext):
-        raise RuntimeError(f"FFmpeg{version} extension is not available.")
-    _load_lib(lib)
-    return importlib.import_module(ext)
-def _find_ffmpeg_extension(ffmpeg_vers):
-    for ffmpeg_ver in ffmpeg_vers:
-        try:
-            return _find_versionsed_ffmpeg_extension(ffmpeg_ver)
-        except Exception:
-            _LG.debug("Failed to load FFmpeg%s extension.", ffmpeg_ver, exc_info=True)
-            continue
-    raise ImportError(
-        f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}. "
-        "Enable DEBUG logging to see more details about the error."
-    )
-def _get_ffmpeg_versions():
-    ffmpeg_vers = _FFMPEG_VERS
-    # User override
-    if (ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION")) is not None:
-        if ffmpeg_ver not in ffmpeg_vers:
-            raise ValueError(
-                f"The FFmpeg version '{ffmpeg_ver}' (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
-                f"is not one of supported values. Possible values are {ffmpeg_vers}"
-            )
-        ffmpeg_vers = [ffmpeg_ver]
-    return ffmpeg_vers
-def _init_ffmpeg():
-    ffmpeg_vers = _get_ffmpeg_versions()
-    ext = _find_ffmpeg_extension(ffmpeg_vers)
-    ext.init()
-    if ext.get_log_level() > 8:
-        ext.set_log_level(8)
-    return ext
 class _LazyImporter(types.ModuleType):
    """Lazily import module/extension."""

--- a/src/torchaudio/io/__init__.py
+++ b/src/torchaudio/io/__init__.py
+from torio.io import CodecConfig, StreamingMediaDecoder as StreamReader, StreamingMediaEncoder as StreamWriter
 from ._effector import AudioEffector
 from ._playback import play_audio
-from ._stream_reader import StreamReader
-from ._stream_writer import CodecConfig, StreamWriter
 __all__ = [

--- a/src/torchaudio/io/_effector.py
+++ b/src/torchaudio/io/_effector.py
@@ -4,8 +4,8 @@ from typing import Iterator, List, Optional
 import torch
 from torch import Tensor
-from ._stream_reader import _get_afilter_desc, StreamReader
+from torio.io._streaming_media_decoder import _get_afilter_desc, StreamingMediaDecoder as StreamReader
-from ._stream_writer import CodecConfig, StreamWriter
+from torio.io._streaming_media_encoder import CodecConfig, StreamingMediaEncoder as StreamWriter
 class _StreamingIOBuffer:

--- a/src/torchaudio/utils/__init__.py
+++ b/src/torchaudio/utils/__init__.py
-from . import ffmpeg_utils, sox_utils
+from torio.utils import ffmpeg_utils
+from . import sox_utils
 from .download import download_asset

--- a/src/torchaudio/utils/ffmpeg_utils.py
+++ b/src/torchaudio/utils/ffmpeg_utils.py
-"""Module to change the configuration of FFmpeg libraries (such as libavformat).
+def __getattr__(item):
+    from torio.utils import ffmpeg_utils
-It affects functionalities in :py:mod:`torchaudio.io` (and indirectly :py:func:`torchaudio.load`).
+    return getattr(ffmpeg_utils, item)
-"""
-from typing import Dict, List, Tuple
-import torchaudio
-ffmpeg_ext = torchaudio._extension.lazy_import_ffmpeg_ext()
-def get_versions() -> Dict[str, Tuple[int]]:
-    """Get the versions of FFmpeg libraries
-    Returns:
-        dict: mapping from library names to version string,
-            i.e. `"libavutil": (56, 22, 100)`.
-    """
-    return ffmpeg_ext.get_versions()
-def get_log_level() -> int:
-    """Get the log level of FFmpeg.
-    See :py:func:`set_log_level` for the detailo.
-    """
-    return ffmpeg_ext.get_log_level()
-def set_log_level(level: int):
-    """Set the log level of FFmpeg (libavformat etc)
-    Arguments:
-        level (int): Log level. The larger, the more verbose.
-            The following values are common values, the corresponding ``ffmpeg``'s
-            ``-loglevel`` option value and desription.
-                * ``-8`` (``quiet``):
-                  Print no output.
-                * ``0`` (``panic``):
-                  Something went really wrong and we will crash now.
-                * ``8`` (``fatal``):
-                  Something went wrong and recovery is not possible.
-                  For example, no header was found for a format which depends
-                  on headers or an illegal combination of parameters is used.
-                * ``16`` (``error``):
-                  Something went wrong and cannot losslessly be recovered.
-                  However, not all future data is affected.
-                * ``24`` (``warning``):
-                  Something somehow does not look correct.
-                  This may or may not lead to problems.
-                * ``32`` (``info``):
-                  Standard information.
-                * ``40`` (``verbose``):
-                  Detailed information.
-                * ``48`` (``debug``):
-                  Stuff which is only useful for libav* developers.
-                * ``56`` (``trace``):
-                  Extremely verbose debugging, useful for libav* development.
-    """
-    ffmpeg_ext.set_log_level(level)
-def get_demuxers() -> Dict[str, str]:
-    """Get the available demuxers.
-    Returns:
-        Dict[str, str]: Mapping from demuxer (format) short name to long name.
-    Example
-        >>> for k, v in get_demuxers().items():
-        >>>     print(f"{k}: {v}")
-        ... aa: Audible AA format files
-        ... aac: raw ADTS AAC (Advanced Audio Coding)
-        ... aax: CRI AAX
-        ... ac3: raw AC-3
-    """
-    return ffmpeg_ext.get_demuxers()
-def get_muxers() -> Dict[str, str]:
-    """Get the available muxers.
-    Returns:
-        Dict[str, str]: Mapping from muxer (format) short name to long name.
-    Example
-        >>> for k, v in get_muxers().items():
-        >>>     print(f"{k}: {v}")
-        ... a64: a64 - video for Commodore 64
-        ... ac3: raw AC-3
-        ... adts: ADTS AAC (Advanced Audio Coding)
-        ... adx: CRI ADX
-        ... aiff: Audio IFF
-    """
-    return ffmpeg_ext.get_muxers()
-def get_audio_decoders() -> Dict[str, str]:
-    """Get the available audio decoders.
-    Returns:
-        Dict[str, str]: Mapping from decoder short name to long name.
-    Example
-        >>> for k, v in get_audio_decoders().items():
-        >>>     print(f"{k}: {v}")
-        ... a64: a64 - video for Commodore 64
-        ... ac3: raw AC-3
-        ... adts: ADTS AAC (Advanced Audio Coding)
-        ... adx: CRI ADX
-        ... aiff: Audio IFF
-    """
-    return ffmpeg_ext.get_audio_decoders()
-def get_audio_encoders() -> Dict[str, str]:
-    """Get the available audio encoders.
-    Returns:
-        Dict[str, str]: Mapping from encoder short name to long name.
-    Example
-        >>> for k, v in get_audio_encoders().items():
-        >>>     print(f"{k}: {v}")
-        ... comfortnoise: RFC 3389 comfort noise generator
-        ... s302m: SMPTE 302M
-        ... aac: AAC (Advanced Audio Coding)
-        ... ac3: ATSC A/52A (AC-3)
-        ... ac3_fixed: ATSC A/52A (AC-3)
-        ... alac: ALAC (Apple Lossless Audio Codec)
-    """
-    return ffmpeg_ext.get_audio_encoders()
-def get_video_decoders() -> Dict[str, str]:
-    """Get the available video decoders.
-    Returns:
-        Dict[str, str]: Mapping from decoder short name to long name.
-    Example
-        >>> for k, v in get_video_decoders().items():
-        >>>     print(f"{k}: {v}")
-        ... aasc: Autodesk RLE
-        ... aic: Apple Intermediate Codec
-        ... alias_pix: Alias/Wavefront PIX image
-        ... agm: Amuse Graphics Movie
-        ... amv: AMV Video
-        ... anm: Deluxe Paint Animation
-    """
-    return ffmpeg_ext.get_video_decoders()
-def get_video_encoders() -> Dict[str, str]:
-    """Get the available video encoders.
-    Returns:
-        Dict[str, str]: Mapping from encoder short name to long name.
-    Example
-        >>> for k, v in get_audio_encoders().items():
-        >>>     print(f"{k}: {v}")
-        ... a64multi: Multicolor charset for Commodore 64
-        ... a64multi5: Multicolor charset for Commodore 64, extended with 5th color (colram)
-        ... alias_pix: Alias/Wavefront PIX image
-        ... amv: AMV Video
-        ... apng: APNG (Animated Portable Network Graphics) image
-        ... asv1: ASUS V1
-        ... asv2: ASUS V2
-    """
-    return ffmpeg_ext.get_video_encoders()
-def get_input_devices() -> Dict[str, str]:
-    """Get the available input devices.
-    Returns:
-        Dict[str, str]: Mapping from device short name to long name.
-    Example
-        >>> for k, v in get_input_devices().items():
-        >>>     print(f"{k}: {v}")
-        ... avfoundation: AVFoundation input device
-        ... lavfi: Libavfilter virtual input device
-    """
-    return ffmpeg_ext.get_input_devices()
-def get_output_devices() -> Dict[str, str]:
-    """Get the available output devices.
-    Returns:
-        Dict[str, str]: Mapping from device short name to long name.
-    Example
-        >>> for k, v in get_output_devices().items():
-        >>>     print(f"{k}: {v}")
-        ... audiotoolbox: AudioToolbox output device
-    """
-    return ffmpeg_ext.get_output_devices()
-def get_input_protocols() -> List[str]:
-    """Get the supported input protocols.
-    Returns:
-        List[str]: The names of supported input protocols
-    Example
-        >>> print(get_input_protocols())
-        ... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix']
-    """
-    return ffmpeg_ext.get_input_protocols()
-def get_output_protocols() -> List[str]:
-    """Get the supported output protocols.
-    Returns:
-        list of str: The names of supported output protocols
-    Example
-        >>> print(get_output_protocols())
-        ... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix']
-    """
-    return ffmpeg_ext.get_output_protocols()
-def get_build_config() -> str:
-    """Get the FFmpeg build configuration
-    Returns:
-        str: Build configuration string.
-    Example
-        >>> print(get_build_config())
-        --prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang  # noqa
-    """
-    return ffmpeg_ext.get_build_config()
-def clear_cuda_context_cache():
-    """Clear the CUDA context used by CUDA Hardware accelerated video decoding"""
-    ffmpeg_ext.clear_cuda_context_cache()
--- a/src/torio/utils/ffmpeg_utils.py
+++ b/src/torio/utils/ffmpeg_utils.py
 """Module to change the configuration of FFmpeg libraries (such as libavformat).
-It affects functionalities in :py:mod:`torio.io`.
 """
 from typing import Dict, List, Tuple
@@ -22,7 +20,7 @@ def get_versions() -> Dict[str, Tuple[int]]:
 def get_log_level() -> int:
    """Get the log level of FFmpeg.
-    See :py:func:`set_log_level` for the detailo.
+    See :py:func:`set_log_level` for the detail.
    """
    return ffmpeg_ext.get_log_level()

--- a/test/torchaudio_unittest/common_utils/case_utils.py
+++ b/test/torchaudio_unittest/common_utils/case_utils.py
@@ -10,6 +10,7 @@ from itertools import zip_longest
 import torch
 import torchaudio
+import torio
 from torch.testing._internal.common_utils import TestCase as PytorchTestCase
 from torchaudio._internal.module_utils import eval_env, is_module_available
 from torchaudio.utils.ffmpeg_utils import get_video_decoders, get_video_encoders
@@ -111,7 +112,7 @@ class TorchaudioTestCase(TestBaseMixin, PytorchTestCase):
    pass
-_IS_FFMPEG_AVAILABLE = torchaudio._extension.lazy_import_ffmpeg_ext().is_available()
+_IS_FFMPEG_AVAILABLE = torio._extension.lazy_import_ffmpeg_ext().is_available()
 _IS_SOX_AVAILABLE = torchaudio._extension.lazy_import_sox_ext().is_available()
 _IS_CTC_DECODER_AVAILABLE = None
 _IS_CUDA_CTC_DECODER_AVAILABLE = None

--- a/test/torchaudio_unittest/io/stream_reader_test.py
+++ b/test/torchaudio_unittest/io/stream_reader_test.py
@@ -5,14 +5,6 @@ import torchaudio
 from parameterized import parameterized, parameterized_class
 from torchaudio.io import StreamReader, StreamWriter
-from torchaudio.io._stream_reader import (
-    ChunkTensor,
-    OutputAudioStream,
-    OutputVideoStream,
-    SourceAudioStream,
-    SourceStream,
-    SourceVideoStream,
-)
 from torchaudio_unittest.common_utils import (
    disabledInCI,
    get_asset_path,
@@ -29,6 +21,14 @@ from torchaudio_unittest.common_utils import (
    TempDirMixin,
    TorchaudioTestCase,
 )
+from torio.io._streaming_media_decoder import (
+    ChunkTensor,
+    OutputAudioStream,
+    OutputVideoStream,
+    SourceAudioStream,
+    SourceStream,
+    SourceVideoStream,
+)
 @skipIfNoFFmpeg

--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -70,19 +70,19 @@ def get_ext_modules():
            # single version ffmpeg mode
            modules.extend(
                [
-                    Extension(name="torchaudio.lib.libtorio_ffmpeg", sources=[]),
+                    Extension(name="torio.lib.libtorio_ffmpeg", sources=[]),
-                    Extension(name="torchaudio.lib._torio_ffmpeg", sources=[]),
+                    Extension(name="torio.lib._torio_ffmpeg", sources=[]),
                ]
            )
        else:
            modules.extend(
                [
-                    Extension(name="torchaudio.lib.libtorio_ffmpeg4", sources=[]),
+                    Extension(name="torio.lib.libtorio_ffmpeg4", sources=[]),
-                    Extension(name="torchaudio.lib._torio_ffmpeg4", sources=[]),
+                    Extension(name="torio.lib._torio_ffmpeg4", sources=[]),
-                    Extension(name="torchaudio.lib.libtorio_ffmpeg5", sources=[]),
+                    Extension(name="torio.lib.libtorio_ffmpeg5", sources=[]),
-                    Extension(name="torchaudio.lib._torio_ffmpeg5", sources=[]),
+                    Extension(name="torio.lib._torio_ffmpeg5", sources=[]),
-                    Extension(name="torchaudio.lib.libtorio_ffmpeg6", sources=[]),
+                    Extension(name="torio.lib.libtorio_ffmpeg6", sources=[]),
-                    Extension(name="torchaudio.lib._torio_ffmpeg6", sources=[]),
+                    Extension(name="torio.lib._torio_ffmpeg6", sources=[]),
                ]
            )
    return modules