Simplify the logic to initialize FFmpeg

Differential Revision: D50193749 Pull Request resolved: https://github.com/pytorch/audio/pull/3650

Simplify the logic to initialize FFmpeg
Differential Revision: D50193749 Pull Request resolved: https://github.com/pytorch/audio/pull/3650
f62367a6 · moto-meta · GitHub · d947dee0 · f62367a6 · f62367a6
Unverified Commit f62367a6 authored Oct 12, 2023 by moto-meta Committed by GitHub Oct 12, 2023
14 changed files
--- a/src/libtorchaudio/pybind/pybind.cpp
+++ b/src/libtorchaudio/pybind/pybind.cpp
@@ -8,7 +8,6 @@ PYBIND11_MODULE(_torchaudio, m) {
  m.def("is_rir_available", &is_rir_available, "");
  m.def("is_align_available", &is_align_available, "");
  m.def("cuda_version", &cuda_version, "");
-  m.def("find_avutil", &find_avutil, "");
 }
 } // namespace

--- a/src/libtorchaudio/utils.cpp
+++ b/src/libtorchaudio/utils.cpp
@@ -31,10 +31,4 @@ c10::optional<int64_t> cuda_version() {
 #endif
 }
-int find_avutil(const char* name) {
-  auto lib = at::DynamicLibrary{name};
-  auto avutil_version = (unsigned (*)())(lib.sym("avutil_version"));
-  return static_cast<int>(avutil_version() >> 16);
-}
 } // namespace torchaudio
--- a/src/libtorchaudio/utils.h
+++ b/src/libtorchaudio/utils.h
@@ -5,5 +5,4 @@ namespace torchaudio {
 bool is_rir_available();
 bool is_align_available();
 c10::optional<int64_t> cuda_version();
-int find_avutil(const char* name);
 } // namespace torchaudio
--- a/src/torchaudio/_backend/utils.py
+++ b/src/torchaudio/_backend/utils.py
@@ -4,7 +4,7 @@ from typing import BinaryIO, Dict, Optional, Tuple, Type, Union
 import torch
-from torchaudio._extension import _FFMPEG_EXT, _SOX_INITIALIZED
+from torchaudio._extension import _SOX_INITIALIZED, lazy_import_ffmpeg_ext
 from . import soundfile_backend
@@ -18,7 +18,7 @@ from .sox import SoXBackend
 @lru_cache(None)
 def get_available_backends() -> Dict[str, Type[Backend]]:
    backend_specs: Dict[str, Type[Backend]] = {}
-    if _FFMPEG_EXT is not None:
+    if lazy_import_ffmpeg_ext().is_available():
        backend_specs["ffmpeg"] = FFmpegBackend
    if _SOX_INITIALIZED:
        backend_specs["sox"] = SoXBackend

--- a/src/torchaudio/_extension/__init__.py
+++ b/src/torchaudio/_extension/__init__.py
@@ -4,11 +4,15 @@ import sys
 from torchaudio._internal.module_utils import eval_env, fail_with_message, is_module_available, no_op
-try:
+from .utils import (
-    from .fb import _init_ffmpeg
+    _check_cuda_version,
-except ImportError:
+    _fail_since_no_sox,
-    from .utils import _init_ffmpeg
+    _init_dll_path,
-from .utils import _check_cuda_version, _fail_since_no_ffmpeg, _fail_since_no_sox, _init_dll_path, _init_sox, _load_lib
+    _init_ffmpeg,
+    _init_sox,
+    _LazyImporter,
+    _load_lib,
+)
 _LG = logging.getLogger(__name__)
@@ -19,12 +23,11 @@ _LG = logging.getLogger(__name__)
 # https://github.com/pytorch/builder/blob/e2e4542b8eb0bdf491214451a1a4128bd606cce2/test/smoke_test/smoke_test.py#L80
 __all__ = [
    "fail_if_no_sox",
-    "fail_if_no_ffmpeg",
    "_check_cuda_version",
    "_IS_TORCHAUDIO_EXT_AVAILABLE",
    "_IS_RIR_AVAILABLE",
    "_SOX_INITIALIZED",
-    "_FFMPEG_EXT",
+    "lazy_import_ffmpeg_ext",
 ]
@@ -81,25 +84,16 @@ else:
    fail_if_no_sox = no_op if _SOX_INITIALIZED else _fail_since_no_sox
-# Initialize FFmpeg-related features
 _FFMPEG_EXT = None
-_USE_FFMPEG = eval_env("TORCHAUDIO_USE_FFMPEG", True)
-if _USE_FFMPEG and _IS_TORCHAUDIO_EXT_AVAILABLE:
-    try:
-        _FFMPEG_EXT = _init_ffmpeg()
-    except Exception:
-        # The initialization of FFmpeg extension will fail if supported FFmpeg
-        # libraries are not found in the system.
-        # Since the rest of the torchaudio works without it, we do not report the
-        # error here.
-        # The error will be raised when user code attempts to use these features.
-        _LG.debug("Failed to initialize ffmpeg bindings", exc_info=True)
-if _USE_FFMPEG:
+def lazy_import_ffmpeg_ext():
-    fail_if_no_ffmpeg = _fail_since_no_ffmpeg if _FFMPEG_EXT is None else no_op
+    """Load FFmpeg integration based on availability in lazy manner"""
-else:
-    fail_if_no_ffmpeg = fail_with_message("requires ffmpeg extension, but it is disabled. (TORCHAUDIO_USE_FFMPEG=0)")
+    global _FFMPEG_EXT
+    if _FFMPEG_EXT is None:
+        _FFMPEG_EXT = _LazyImporter("_torchaudio_ffmpeg", _init_ffmpeg)
+    return _FFMPEG_EXT
 fail_if_no_rir = (

--- a/src/torchaudio/_extension/utils.py
+++ b/src/torchaudio/_extension/utils.py
@@ -5,17 +5,14 @@ They should not depend on external state.
 Anything that depends on external state should happen in __init__.py
 """
 import importlib
 import logging
 import os
-import platform
+import types
-import warnings
 from functools import wraps
 from pathlib import Path
 import torch
-import torchaudio
 _LG = logging.getLogger(__name__)
 _LIB_DIR = Path(__file__).parent.parent / "lib"
@@ -62,7 +59,6 @@ def _load_lib(lib: str) -> bool:
    if not path.exists():
        return False
    torch.ops.load_library(path)
-    torch.classes.load_library(path)
    return True
@@ -78,94 +74,98 @@ def _init_sox():
    atexit.register(torch.ops.torchaudio.sox_effects_shutdown_sox_effects)
-def _try_access_avutil(ffmpeg_ver):
+_FFMPEG_VERS = ["6", "5", "4", ""]
-    libname_template = {
-        "Linux": "libavutil.so.{ver}",
-        "Darwin": "libavutil.{ver}.dylib",
-        "Windows": "avutil-{ver}.dll",
-    }[platform.system()]
-    avutil_ver = {"6": 58, "5": 57, "4": 56}[ffmpeg_ver]
-    libavutil = libname_template.format(ver=avutil_ver)
-    torchaudio.lib._torchaudio.find_avutil(libavutil)
-def _find_versionsed_ffmpeg_extension(ffmpeg_ver: str):
-    _LG.debug("Attempting to load FFmpeg version %s.", ffmpeg_ver)
-    library = f"libtorchaudio_ffmpeg{ffmpeg_ver}"
-    extension = f"_torchaudio_ffmpeg{ffmpeg_ver}"
-    if not _get_lib_path(extension).exists():
-        raise RuntimeError(f"FFmpeg {ffmpeg_ver} extension is not available.")
-    if ffmpeg_ver:
-        # A simple check for FFmpeg availability.
-        # This is not technically sufficient as other libraries could be missing,
-        # but usually this is sufficient.
-        #
-        # Note: the reason why this check is performed is because I don't know
-        # if the next `_load_lib` (which calls `ctypes.CDLL` under the hood),
-        # could leak handle to shared libraries of dependencies, in case it fails.
-        #
-        # i.e. If the `ctypes.CDLL("foo")` fails because one of `foo`'s dependency
-        # does not exist while `foo` and some other dependencies exist, is it guaranteed
-        # that none-of them are kept in memory after the failure??
-        _try_access_avutil(ffmpeg_ver)
-    _load_lib(library)
+def _find_versionsed_ffmpeg_extension(version: str):
+    _LG.debug("Attempting to load FFmpeg%s", version)
-    _LG.debug("Found FFmpeg version %s.", ffmpeg_ver)
+    ext = f"torchaudio.lib._torchaudio_ffmpeg{version}"
-    return importlib.import_module(f"torchaudio.lib.{extension}")
+    lib = f"libtorchaudio_ffmpeg{version}"
+    if not importlib.util.find_spec(ext):
+        raise RuntimeError(f"FFmpeg{version} extension is not available.")
-_FFMPEG_VERS = ["6", "5", "4", ""]
+    _load_lib(lib)
+    return importlib.import_module(ext)
-def _find_ffmpeg_extension(ffmpeg_vers, show_error):
+def _find_ffmpeg_extension(ffmpeg_vers):
-    logger = _LG.error if show_error else _LG.debug
    for ffmpeg_ver in ffmpeg_vers:
        try:
            return _find_versionsed_ffmpeg_extension(ffmpeg_ver)
        except Exception:
-            logger("Failed to load FFmpeg %s extension.", ffmpeg_ver, exc_info=True)
+            _LG.debug("Failed to load FFmpeg%s extension.", ffmpeg_ver, exc_info=True)
            continue
-    raise ImportError(f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}")
+    raise ImportError(
+        f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}. "
+        "Enable DEBUG logging to see more details about the error."
-def _find_available_ffmpeg_ext():
-    ffmpeg_vers = ["6", "5", "4", ""]
-    return [v for v in ffmpeg_vers if _get_lib_path(f"_torchaudio_ffmpeg{v}").exists()]
-def _init_ffmpeg(show_error=False):
-    ffmpeg_vers = _find_available_ffmpeg_ext()
-    if not ffmpeg_vers:
-        raise RuntimeError(
-            # fmt: off
-            "TorchAudio is not built with FFmpeg integration. "
-            "Please build torchaudio with USE_FFMPEG=1."
-            # fmt: on
    )
+def _get_ffmpeg_versions():
+    ffmpeg_vers = _FFMPEG_VERS
    # User override
-    if ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION"):
+    if (ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION")) is not None:
-        if ffmpeg_vers == [""]:
-            warnings.warn("TorchAudio is built in single FFmpeg mode. TORCHAUDIO_USE_FFMPEG_VERSION is ignored.")
-        else:
        if ffmpeg_ver not in ffmpeg_vers:
            raise ValueError(
-                    f"The FFmpeg version {ffmpeg_ver} (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
+                f"The FFmpeg version '{ffmpeg_ver}' (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
-                    f"is not available. Available versions are {[v for v in ffmpeg_vers if v]}"
+                f"is not one of supported values. Possible values are {ffmpeg_vers}"
            )
        ffmpeg_vers = [ffmpeg_ver]
+    return ffmpeg_vers
-    ext = _find_ffmpeg_extension(ffmpeg_vers, show_error)
+def _init_ffmpeg():
+    ffmpeg_vers = _get_ffmpeg_versions()
+    ext = _find_ffmpeg_extension(ffmpeg_vers)
    ext.init()
    if ext.get_log_level() > 8:
        ext.set_log_level(8)
    return ext
+class _LazyImporter(types.ModuleType):
+    """Lazily import module/extension."""
+    def __init__(self, name, import_func):
+        super().__init__(name)
+        self.import_func = import_func
+        self.module = None
+    # Note:
+    # Python caches what was retrieved with `__getattr__`, so this method will not be
+    # called again for the same item.
+    def __getattr__(self, item):
+        self._import_once()
+        return getattr(self.module, item)
+    def __repr__(self):
+        if self.module is None:
+            return f"<module '{self.__module__}.{self.__class__.__name__}(\"{self.name}\")'>"
+        return repr(self.module)
+    def __dir__(self):
+        self._import_once()
+        return dir(self.module)
+    def _import_once(self):
+        if self.module is None:
+            self.module = self.import_func()
+            # Note:
+            # By attaching the module attributes to self,
+            # module attributes are directly accessible.
+            # This allows to avoid calling __getattr__ for every attribute access.
+            self.__dict__.update(self.module.__dict__)
+    def is_available(self):
+        try:
+            self._import_once()
+        except Exception:
+            return False
+        return True
 def _init_dll_path():
    # On Windows Python-3.8+ has `os.add_dll_directory` call,
    # which is called to configure dll search path.
@@ -182,6 +182,8 @@ def _init_dll_path():
 def _check_cuda_version():
+    import torchaudio.lib._torchaudio
    version = torchaudio.lib._torchaudio.cuda_version()
    if version is not None and torch.version.cuda is not None:
        version_str = str(version)
@@ -214,22 +216,3 @@ def _fail_since_no_sox(func):
        return func(*_args, **_kwargs)
    return wrapped
-def _fail_since_no_ffmpeg(func):
-    @wraps(func)
-    def wrapped(*_args, **_kwargs):
-        try:
-            # Note:
-            # We run _init_ffmpeg again just to show users the stacktrace.
-            # _init_ffmpeg would not succeed here.
-            _init_ffmpeg(show_error=True)
-        except Exception as err:
-            raise RuntimeError(
-                f"{func.__name__} requires FFmpeg extension which is not available. "
-                "Please refer to the stacktrace above for how to resolve this."
-            ) from err
-        # This should not happen in normal execution, but just in case.
-        return func(*_args, **_kwargs)
-    return wrapped
--- a/src/torchaudio/io/_playback.py
+++ b/src/torchaudio/io/_playback.py
@@ -15,7 +15,6 @@ dict_format = {
 }
-@torchaudio._extension.fail_if_no_ffmpeg
 def play_audio(
    waveform: torch.Tensor,
    sample_rate: Optional[float],
@@ -57,7 +56,9 @@ def play_audio(
    time, num_channels = waveform.size()
    if num_channels > 2:
        warnings.warn(
-            f"Expected up to 2 channels, got {num_channels} channels instead. Only the first 2 channels will be played."
+            f"Expected up to 2 channels, got {num_channels} channels instead. "
+            "Only the first 2 channels will be played.",
+            stacklevel=2,
        )
    # Write to speaker device

--- a/src/torchaudio/io/_stream_reader.py
+++ b/src/torchaudio/io/_stream_reader.py
@@ -9,11 +9,7 @@ import torch
 import torchaudio
 from torch.utils._pytree import tree_map
-if torchaudio._extension._FFMPEG_EXT is not None:
+ffmpeg_ext = torchaudio._extension.lazy_import_ffmpeg_ext()
-    _StreamReader = torchaudio._extension._FFMPEG_EXT.StreamReader
-    _StreamReaderBytes = torchaudio._extension._FFMPEG_EXT.StreamReaderBytes
-    _StreamReaderFileObj = torchaudio._extension._FFMPEG_EXT.StreamReaderFileObj
 __all__ = [
    "StreamReader",
@@ -442,7 +438,6 @@ InputStreamTypes = TypeVar("InputStream", bound=SourceStream)
 OutputStreamTypes = TypeVar("OutputStream", bound=OutputStream)
-@torchaudio._extension.fail_if_no_ffmpeg
 class StreamReader:
    """Fetch and decode audio/video streams chunk by chunk.
@@ -524,11 +519,11 @@ class StreamReader:
    ):
        self.src = src
        if isinstance(src, bytes):
-            self._be = _StreamReaderBytes(src, format, option, buffer_size)
+            self._be = ffmpeg_ext.StreamReaderBytes(src, format, option, buffer_size)
        elif hasattr(src, "read"):
-            self._be = _StreamReaderFileObj(src, format, option, buffer_size)
+            self._be = ffmpeg_ext.StreamReaderFileObj(src, format, option, buffer_size)
        else:
-            self._be = _StreamReader(os.path.normpath(src), format, option)
+            self._be = ffmpeg_ext.StreamReader(os.path.normpath(src), format, option)
        i = self._be.find_best_audio_stream()
        self._default_audio_stream = None if i < 0 else i

--- a/src/torchaudio/io/_stream_writer.py
+++ b/src/torchaudio/io/_stream_writer.py
@@ -5,17 +5,11 @@ from typing import BinaryIO, Dict, Optional, Union
 import torch
 import torchaudio
+ffmpeg_ext = torchaudio._extension.lazy_import_ffmpeg_ext()
-if torchaudio._extension._FFMPEG_EXT is None:
-    ConfigBase = object
-else:
-    ConfigBase = torchaudio._extension._FFMPEG_EXT.CodecConfig
-    _StreamWriter = torchaudio._extension._FFMPEG_EXT.StreamWriter
-    _StreamWriterFileObj = torchaudio._extension._FFMPEG_EXT.StreamWriterFileObj
 @dataclass
-class CodecConfig(ConfigBase):
+class CodecConfig:
    """Codec configuration."""
    bit_rate: int = -1
@@ -37,8 +31,19 @@ class CodecConfig(ConfigBase):
    max_b_frames: int = -1
    """maximum number of B-frames between non-B-frames."""
-    def __post_init__(self):
-        super().__init__(self.bit_rate, self.compression_level, self.qscale, self.gop_size, self.max_b_frames)
+def _convert_config(cfg: CodecConfig):
+    if cfg is None:
+        return None
+    # Convert the codecconfig to C++ compatible type.
+    # omitting the return type annotation so as not to access ffmpeg_ext here.
+    return ffmpeg_ext.CodecConfig(
+        cfg.bit_rate,
+        cfg.compression_level,
+        cfg.qscale,
+        cfg.gop_size,
+        cfg.max_b_frames,
+    )
 def _format_doc(**kwargs):
@@ -128,7 +133,6 @@ _format_common_args = _format_doc(
 )
-@torchaudio._extension.fail_if_no_ffmpeg
 class StreamWriter:
    """Encode and write audio/video streams chunk by chunk
@@ -190,9 +194,9 @@ class StreamWriter:
        buffer_size: int = 4096,
    ):
        if hasattr(dst, "write"):
-            self._s = _StreamWriterFileObj(dst, format, buffer_size)
+            self._s = ffmpeg_ext.StreamWriterFileObj(dst, format, buffer_size)
        else:
-            self._s = _StreamWriter(str(dst), format)
+            self._s = ffmpeg_ext.StreamWriter(str(dst), format)
        self._is_open = False
    @_format_common_args
@@ -280,7 +284,7 @@ class StreamWriter:
            encoder_format,
            encoder_sample_rate,
            encoder_num_channels,
-            codec_config,
+            _convert_config(codec_config),
            filter_desc,
        )
@@ -376,7 +380,7 @@ class StreamWriter:
            encoder_width,
            encoder_height,
            hw_accel,
-            codec_config,
+            _convert_config(codec_config),
            filter_desc,
        )

--- a/src/torchaudio/utils/ffmpeg_utils.py
+++ b/src/torchaudio/utils/ffmpeg_utils.py
@@ -6,8 +6,9 @@ from typing import Dict, List, Tuple
 import torchaudio
+ffmpeg_ext = torchaudio._extension.lazy_import_ffmpeg_ext()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_versions() -> Dict[str, Tuple[int]]:
    """Get the versions of FFmpeg libraries
@@ -15,19 +16,17 @@ def get_versions() -> Dict[str, Tuple[int]]:
        dict: mapping from library names to version string,
            i.e. `"libavutil": (56, 22, 100)`.
    """
-    return torchaudio._extension._FFMPEG_EXT.get_versions()
+    return ffmpeg_ext.get_versions()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_log_level() -> int:
    """Get the log level of FFmpeg.
    See :py:func:`set_log_level` for the detailo.
    """
-    return torchaudio._extension._FFMPEG_EXT.get_log_level()
+    return ffmpeg_ext.get_log_level()
-@torchaudio._extension.fail_if_no_ffmpeg
 def set_log_level(level: int):
    """Set the log level of FFmpeg (libavformat etc)
@@ -61,10 +60,9 @@ def set_log_level(level: int):
                  Extremely verbose debugging, useful for libav* development.
    """
-    torchaudio._extension._FFMPEG_EXT.set_log_level(level)
+    ffmpeg_ext.set_log_level(level)
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_demuxers() -> Dict[str, str]:
    """Get the available demuxers.
@@ -79,10 +77,9 @@ def get_demuxers() -> Dict[str, str]:
        ... aax: CRI AAX
        ... ac3: raw AC-3
    """
-    return torchaudio._extension._FFMPEG_EXT.get_demuxers()
+    return ffmpeg_ext.get_demuxers()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_muxers() -> Dict[str, str]:
    """Get the available muxers.
@@ -98,10 +95,9 @@ def get_muxers() -> Dict[str, str]:
        ... adx: CRI ADX
        ... aiff: Audio IFF
    """
-    return torchaudio._extension._FFMPEG_EXT.get_muxers()
+    return ffmpeg_ext.get_muxers()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_audio_decoders() -> Dict[str, str]:
    """Get the available audio decoders.
@@ -117,10 +113,9 @@ def get_audio_decoders() -> Dict[str, str]:
        ... adx: CRI ADX
        ... aiff: Audio IFF
    """
-    return torchaudio._extension._FFMPEG_EXT.get_audio_decoders()
+    return ffmpeg_ext.get_audio_decoders()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_audio_encoders() -> Dict[str, str]:
    """Get the available audio encoders.
@@ -137,10 +132,9 @@ def get_audio_encoders() -> Dict[str, str]:
        ... ac3_fixed: ATSC A/52A (AC-3)
        ... alac: ALAC (Apple Lossless Audio Codec)
    """
-    return torchaudio._extension._FFMPEG_EXT.get_audio_encoders()
+    return ffmpeg_ext.get_audio_encoders()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_video_decoders() -> Dict[str, str]:
    """Get the available video decoders.
@@ -157,10 +151,9 @@ def get_video_decoders() -> Dict[str, str]:
        ... amv: AMV Video
        ... anm: Deluxe Paint Animation
    """
-    return torchaudio._extension._FFMPEG_EXT.get_video_decoders()
+    return ffmpeg_ext.get_video_decoders()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_video_encoders() -> Dict[str, str]:
    """Get the available video encoders.
@@ -178,10 +171,9 @@ def get_video_encoders() -> Dict[str, str]:
        ... asv1: ASUS V1
        ... asv2: ASUS V2
    """
-    return torchaudio._extension._FFMPEG_EXT.get_video_encoders()
+    return ffmpeg_ext.get_video_encoders()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_input_devices() -> Dict[str, str]:
    """Get the available input devices.
@@ -194,10 +186,9 @@ def get_input_devices() -> Dict[str, str]:
        ... avfoundation: AVFoundation input device
        ... lavfi: Libavfilter virtual input device
    """
-    return torchaudio._extension._FFMPEG_EXT.get_input_devices()
+    return ffmpeg_ext.get_input_devices()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_output_devices() -> Dict[str, str]:
    """Get the available output devices.
@@ -209,10 +200,9 @@ def get_output_devices() -> Dict[str, str]:
        >>>     print(f"{k}: {v}")
        ... audiotoolbox: AudioToolbox output device
    """
-    return torchaudio._extension._FFMPEG_EXT.get_output_devices()
+    return ffmpeg_ext.get_output_devices()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_input_protocols() -> List[str]:
    """Get the supported input protocols.
@@ -223,10 +213,9 @@ def get_input_protocols() -> List[str]:
        >>> print(get_input_protocols())
        ... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix']
    """
-    return torchaudio._extension._FFMPEG_EXT.get_input_protocols()
+    return ffmpeg_ext.get_input_protocols()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_output_protocols() -> List[str]:
    """Get the supported output protocols.
@@ -237,10 +226,9 @@ def get_output_protocols() -> List[str]:
        >>> print(get_output_protocols())
        ... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix']
    """
-    return torchaudio._extension._FFMPEG_EXT.get_output_protocols()
+    return ffmpeg_ext.get_output_protocols()
-@torchaudio._extension.fail_if_no_ffmpeg
 def get_build_config() -> str:
    """Get the FFmpeg build configuration
@@ -251,10 +239,9 @@ def get_build_config() -> str:
        >>> print(get_build_config())
        --prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang  # noqa
    """
-    return torchaudio._extension._FFMPEG_EXT.get_build_config()
+    return ffmpeg_ext.get_build_config()
-@torchaudio._extension.fail_if_no_ffmpeg
 def clear_cuda_context_cache():
    """Clear the CUDA context used by CUDA Hardware accelerated video decoding"""
-    torchaudio._extension._FFMPEG_EXT.clear_cuda_context_cache()
+    ffmpeg_ext.clear_cuda_context_cache()
--- a/test/torchaudio_unittest/common_utils/__init__.py
+++ b/test/torchaudio_unittest/common_utils/__init__.py
@@ -3,7 +3,6 @@ from .backend_utils import set_audio_backend
 from .case_utils import (
    disabledInCI,
    HttpServerMixin,
-    is_ffmpeg_available,
    PytorchTestCase,
    skipIfCudaSmallMemory,
    skipIfNoAudioDevice,
@@ -44,7 +43,6 @@ __all__ = [
    "TestBaseMixin",
    "PytorchTestCase",
    "TorchaudioTestCase",
-    "is_ffmpeg_available",
    "skipIfNoAudioDevice",
    "skipIfNoCtcDecoder",
    "skipIfNoCuCtcDecoder",

--- a/test/torchaudio_unittest/common_utils/case_utils.py
+++ b/test/torchaudio_unittest/common_utils/case_utils.py
@@ -111,10 +111,7 @@ class TorchaudioTestCase(TestBaseMixin, PytorchTestCase):
    pass
-def is_ffmpeg_available():
+_IS_FFMPEG_AVAILABLE = torchaudio._extension.lazy_import_ffmpeg_ext().is_available()
-    return torchaudio._extension._FFMPEG_EXT is not None
 _IS_CTC_DECODER_AVAILABLE = None
 _IS_CUDA_CTC_DECODER_AVAILABLE = None
@@ -260,7 +257,7 @@ skipIfNoQengine = _skipIf(
    key="NO_QUANTIZATION",
 )
 skipIfNoFFmpeg = _skipIf(
-    not is_ffmpeg_available(),
+    not _IS_FFMPEG_AVAILABLE,
    reason="ffmpeg features are not available.",
    key="NO_FFMPEG",
 )
@@ -273,7 +270,7 @@ skipIfPy310 = _skipIf(
    key="ON_PYTHON_310",
 )
 skipIfNoAudioDevice = _skipIf(
-    not torchaudio.utils.ffmpeg_utils.get_output_devices(),
+    not (_IS_FFMPEG_AVAILABLE and torchaudio.utils.ffmpeg_utils.get_output_devices()),
    reason="No output audio device is available.",
    key="NO_AUDIO_OUT_DEVICE",
 )
@@ -291,7 +288,7 @@ disabledInCI = _skipIf(
 def skipIfNoHWAccel(name):
    key = "NO_HW_ACCEL"
-    if not is_ffmpeg_available():
+    if not _IS_FFMPEG_AVAILABLE:
        return _skipIf(True, reason="ffmpeg features are not available.", key=key)
    if not torch.cuda.is_available():
        return _skipIf(True, reason="CUDA is not available.", key=key)

--- a/test/torchaudio_unittest/io/stream_reader_test.py
+++ b/test/torchaudio_unittest/io/stream_reader_test.py
@@ -3,13 +3,22 @@ import io
 import torch
 import torchaudio
 from parameterized import parameterized, parameterized_class
+from torchaudio.io import StreamReader, StreamWriter
+from torchaudio.io._stream_reader import (
+    ChunkTensor,
+    OutputAudioStream,
+    OutputVideoStream,
+    SourceAudioStream,
+    SourceStream,
+    SourceVideoStream,
+)
 from torchaudio_unittest.common_utils import (
    disabledInCI,
    get_asset_path,
    get_image,
    get_sinusoid,
    get_wav_data,
-    is_ffmpeg_available,
    nested_params,
    rgb_to_gray,
    rgb_to_yuv_ccir,
@@ -22,18 +31,6 @@ from torchaudio_unittest.common_utils import (
 )
-if is_ffmpeg_available():
-    from torchaudio.io import StreamReader, StreamWriter
-    from torchaudio.io._stream_reader import (
-        ChunkTensor,
-        OutputAudioStream,
-        OutputVideoStream,
-        SourceAudioStream,
-        SourceStream,
-        SourceVideoStream,
-    )
 @skipIfNoFFmpeg
 class ChunkTensorTest(TorchaudioTestCase):
    def test_chunktensor(self):

--- a/test/torchaudio_unittest/io/stream_writer_test.py
+++ b/test/torchaudio_unittest/io/stream_writer_test.py
@@ -5,10 +5,11 @@ import torch
 import torchaudio
 from parameterized import parameterized, parameterized_class
+from torchaudio.io import CodecConfig, StreamReader, StreamWriter
 from torchaudio_unittest.common_utils import (
    get_asset_path,
    get_sinusoid,
-    is_ffmpeg_available,
    nested_params,
    rgb_to_yuv_ccir,
    skipIfNoFFmpeg,
@@ -19,9 +20,6 @@ from torchaudio_unittest.common_utils import (
 from .common import lt42
-if is_ffmpeg_available():
-    from torchaudio.io import CodecConfig, StreamReader, StreamWriter
 def get_audio_chunk(fmt, sample_rate, num_channels):
    path = get_asset_path("nasa_13013.mp4")