Support multiple FFmpeg versions (#3464)

Summary: This commit introduces support for multiple FFmpeg versions for OSS binary distributions. Currently torchaudio only works with FFmpeg 4. This is inconvenient from installing to runtime linking. This commit allows to pick FFmpeg 4, 5 or 6 at runtime, instead of just looking for v4. The way it works is that we compile the FFmpeg extension three times with different FFmpeg and ship them. At runtime, we look for libavutil of specific version and when one is found, load the corresponding FFmpeg extension. The order of preference is 6, 5, then 4. To make the build process simple and reproducible, we use pre-built binaries of FFmpeg during the build. They are LGPL and downloaded from S3 at build time, instead of building every time. The use of pre-built binaries as scaffolding limits the system that can build torchaudio, so it also introduces single FFmpeg version support mode. setting FFMPEG_ROOT during the build will change the way binaries are built so that it will only support one specific version of FFmpeg. Pull Request resolved: https://github.com/pytorch/audio/pull/3464 Differential Revision: D47300223 Pulled By: mthrok fbshipit-source-id: 560c7968315e4c8922afa11a4693f648c0356d04

Support multiple FFmpeg versions (#3464)
Summary: This commit introduces support for multiple FFmpeg versions for OSS binary distributions. Currently torchaudio only works with FFmpeg 4. This is inconvenient from installing to runtime linking. This commit allows to pick FFmpeg 4, 5 or 6 at runtime, instead of just looking for v4. The way it works is that we compile the FFmpeg extension three times with different FFmpeg and ship them. At runtime, we look for libavutil of specific version and when one is found, load the corresponding FFmpeg extension. The order of preference is 6, 5, then 4. To make the build process simple and reproducible, we use pre-built binaries of FFmpeg during the build. They are LGPL and downloaded from S3 at build time, instead of building every time. The use of pre-built binaries as scaffolding limits the system that can build torchaudio, so it also introduces single FFmpeg version support mode. setting FFMPEG_ROOT during the build will change the way binaries are built so that it will only support one specific version of FFmpeg. Pull Request resolved: https://github.com/pytorch/audio/pull/3464 Differential Revision: D47300223 Pulled By: mthrok fbshipit-source-id: 560c7968315e4c8922afa11a4693f648c0356d04
786066b4 · moto · Facebook GitHub Bot · cc41178b · 786066b4 · 786066b4
Commit 786066b4 authored Jul 11, 2023 by moto Committed by Facebook GitHub Bot Jul 11, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 20 deletions

torchaudio/io/_stream_writer.py torchaudio/io/_stream_writer.py +7 -5

torchaudio/utils/ffmpeg_utils.py torchaudio/utils/ffmpeg_utils.py +15 -15

No files found.
--- a/torchaudio/io/_stream_writer.py
+++ b/torchaudio/io/_stream_writer.py
@@ -5,10 +5,12 @@ import torch
 import torchaudio
-if torchaudio._extension._FFMPEG_INITIALIZED:
+if torchaudio._extension._FFMPEG_EXT is None:
-    ConfigBase = torchaudio.lib._torchaudio_ffmpeg.CodecConfig
-else:
    ConfigBase = object
+else:
+    ConfigBase = torchaudio._extension._FFMPEG_EXT.CodecConfig
+    _StreamWriter = torchaudio._extension._FFMPEG_EXT.StreamWriter
+    _StreamWriterFileObj = torchaudio._extension._FFMPEG_EXT.StreamWriterFileObj
 @dataclass
@@ -187,9 +189,9 @@ class StreamWriter:
        buffer_size: int = 4096,
    ):
        if isinstance(dst, str):
-            self._s = torchaudio.lib._torchaudio_ffmpeg.StreamWriter(dst, format)
+            self._s = _StreamWriter(dst, format)
        elif hasattr(dst, "write"):
-            self._s = torchaudio.lib._torchaudio_ffmpeg.StreamWriterFileObj(dst, format, buffer_size)
+            self._s = _StreamWriterFileObj(dst, format, buffer_size)
        else:
            raise ValueError("`dst` must be either a string or a file-like object.")
        self._is_open = False

--- a/torchaudio/utils/ffmpeg_utils.py
+++ b/torchaudio/utils/ffmpeg_utils.py
@@ -15,7 +15,7 @@ def get_versions() -> Dict[str, Tuple[int]]:
        dict: mapping from library names to version string,
            i.e. `"libavutil": (56, 22, 100)`.
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_versions()
+    return torchaudio._extension._FFMPEG_EXT.get_versions()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -24,7 +24,7 @@ def get_log_level() -> int:
    See :py:func:`set_log_level` for the detailo.
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_log_level()
+    return torchaudio._extension._FFMPEG_EXT.get_log_level()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -61,7 +61,7 @@ def set_log_level(level: int):
                  Extremely verbose debugging, useful for libav* development.
    """
-    torchaudio.lib._torchaudio_ffmpeg.set_log_level(level)
+    torchaudio._extension._FFMPEG_EXT.set_log_level(level)
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -79,7 +79,7 @@ def get_demuxers() -> Dict[str, str]:
        ... aax: CRI AAX
        ... ac3: raw AC-3
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_demuxers()
+    return torchaudio._extension._FFMPEG_EXT.get_demuxers()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -98,7 +98,7 @@ def get_muxers() -> Dict[str, str]:
        ... adx: CRI ADX
        ... aiff: Audio IFF
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_muxers()
+    return torchaudio._extension._FFMPEG_EXT.get_muxers()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -117,7 +117,7 @@ def get_audio_decoders() -> Dict[str, str]:
        ... adx: CRI ADX
        ... aiff: Audio IFF
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_audio_decoders()
+    return torchaudio._extension._FFMPEG_EXT.get_audio_decoders()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -137,7 +137,7 @@ def get_audio_encoders() -> Dict[str, str]:
        ... ac3_fixed: ATSC A/52A (AC-3)
        ... alac: ALAC (Apple Lossless Audio Codec)
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_audio_encoders()
+    return torchaudio._extension._FFMPEG_EXT.get_audio_encoders()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -157,7 +157,7 @@ def get_video_decoders() -> Dict[str, str]:
        ... amv: AMV Video
        ... anm: Deluxe Paint Animation
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_video_decoders()
+    return torchaudio._extension._FFMPEG_EXT.get_video_decoders()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -178,7 +178,7 @@ def get_video_encoders() -> Dict[str, str]:
        ... asv1: ASUS V1
        ... asv2: ASUS V2
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_video_encoders()
+    return torchaudio._extension._FFMPEG_EXT.get_video_encoders()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -194,7 +194,7 @@ def get_input_devices() -> Dict[str, str]:
        ... avfoundation: AVFoundation input device
        ... lavfi: Libavfilter virtual input device
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_input_devices()
+    return torchaudio._extension._FFMPEG_EXT.get_input_devices()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -209,7 +209,7 @@ def get_output_devices() -> Dict[str, str]:
        >>>     print(f"{k}: {v}")
        ... audiotoolbox: AudioToolbox output device
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_output_devices()
+    return torchaudio._extension._FFMPEG_EXT.get_output_devices()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -223,7 +223,7 @@ def get_input_protocols() -> List[str]:
        >>> print(get_input_protocols())
        ... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix']
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_input_protocols()
+    return torchaudio._extension._FFMPEG_EXT.get_input_protocols()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -237,7 +237,7 @@ def get_output_protocols() -> List[str]:
        >>> print(get_output_protocols())
        ... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix']
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_output_protocols()
+    return torchaudio._extension._FFMPEG_EXT.get_output_protocols()
 @torchaudio._extension.fail_if_no_ffmpeg
@@ -251,10 +251,10 @@ def get_build_config() -> str:
        >>> print(get_build_config())
        --prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang  # noqa
    """
-    return torchaudio.lib._torchaudio_ffmpeg.get_build_config()
+    return torchaudio._extension._FFMPEG_EXT.get_build_config()
 @torchaudio._extension.fail_if_no_ffmpeg
 def clear_cuda_context_cache():
    """Clear the CUDA context used by CUDA Hardware accelerated video decoding"""
-    torchaudio.lib._torchaudio_ffmpeg.clear_cuda_context_cache()
+    torchaudio._extension._FFMPEG_EXT.clear_cuda_context_cache()