Commit 786066b4 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Support multiple FFmpeg versions (#3464)

Summary:
This commit introduces support for multiple FFmpeg versions for OSS binary distributions.

Currently torchaudio only works with FFmpeg 4. This is inconvenient from installing to runtime linking.
This commit allows to pick FFmpeg 4, 5 or 6 at runtime, instead of just looking for v4.

The way it works is that we compile the FFmpeg extension three times with different FFmpeg and ship them.
At runtime, we look for libavutil of specific version and when one is found, load the corresponding FFmpeg extension.
The order of preference is 6, 5, then 4.

To make the build process simple and reproducible, we use pre-built binaries of FFmpeg during the build.
They are LGPL and downloaded from S3 at build time, instead of building every time.

The use of pre-built binaries as scaffolding limits the system that can build torchaudio, so it also introduces
single FFmpeg version support mode. setting FFMPEG_ROOT during the build will change the way binaries are built
so that it will only support one specific version of FFmpeg.

Pull Request resolved: https://github.com/pytorch/audio/pull/3464

Differential Revision: D47300223

Pulled By: mthrok

fbshipit-source-id: 560c7968315e4c8922afa11a4693f648c0356d04
parent cc41178b
...@@ -5,10 +5,12 @@ import torch ...@@ -5,10 +5,12 @@ import torch
import torchaudio import torchaudio
if torchaudio._extension._FFMPEG_INITIALIZED: if torchaudio._extension._FFMPEG_EXT is None:
ConfigBase = torchaudio.lib._torchaudio_ffmpeg.CodecConfig
else:
ConfigBase = object ConfigBase = object
else:
ConfigBase = torchaudio._extension._FFMPEG_EXT.CodecConfig
_StreamWriter = torchaudio._extension._FFMPEG_EXT.StreamWriter
_StreamWriterFileObj = torchaudio._extension._FFMPEG_EXT.StreamWriterFileObj
@dataclass @dataclass
...@@ -187,9 +189,9 @@ class StreamWriter: ...@@ -187,9 +189,9 @@ class StreamWriter:
buffer_size: int = 4096, buffer_size: int = 4096,
): ):
if isinstance(dst, str): if isinstance(dst, str):
self._s = torchaudio.lib._torchaudio_ffmpeg.StreamWriter(dst, format) self._s = _StreamWriter(dst, format)
elif hasattr(dst, "write"): elif hasattr(dst, "write"):
self._s = torchaudio.lib._torchaudio_ffmpeg.StreamWriterFileObj(dst, format, buffer_size) self._s = _StreamWriterFileObj(dst, format, buffer_size)
else: else:
raise ValueError("`dst` must be either a string or a file-like object.") raise ValueError("`dst` must be either a string or a file-like object.")
self._is_open = False self._is_open = False
......
...@@ -15,7 +15,7 @@ def get_versions() -> Dict[str, Tuple[int]]: ...@@ -15,7 +15,7 @@ def get_versions() -> Dict[str, Tuple[int]]:
dict: mapping from library names to version string, dict: mapping from library names to version string,
i.e. `"libavutil": (56, 22, 100)`. i.e. `"libavutil": (56, 22, 100)`.
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_versions() return torchaudio._extension._FFMPEG_EXT.get_versions()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -24,7 +24,7 @@ def get_log_level() -> int: ...@@ -24,7 +24,7 @@ def get_log_level() -> int:
See :py:func:`set_log_level` for the detailo. See :py:func:`set_log_level` for the detailo.
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_log_level() return torchaudio._extension._FFMPEG_EXT.get_log_level()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -61,7 +61,7 @@ def set_log_level(level: int): ...@@ -61,7 +61,7 @@ def set_log_level(level: int):
Extremely verbose debugging, useful for libav* development. Extremely verbose debugging, useful for libav* development.
""" """
torchaudio.lib._torchaudio_ffmpeg.set_log_level(level) torchaudio._extension._FFMPEG_EXT.set_log_level(level)
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -79,7 +79,7 @@ def get_demuxers() -> Dict[str, str]: ...@@ -79,7 +79,7 @@ def get_demuxers() -> Dict[str, str]:
... aax: CRI AAX ... aax: CRI AAX
... ac3: raw AC-3 ... ac3: raw AC-3
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_demuxers() return torchaudio._extension._FFMPEG_EXT.get_demuxers()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -98,7 +98,7 @@ def get_muxers() -> Dict[str, str]: ...@@ -98,7 +98,7 @@ def get_muxers() -> Dict[str, str]:
... adx: CRI ADX ... adx: CRI ADX
... aiff: Audio IFF ... aiff: Audio IFF
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_muxers() return torchaudio._extension._FFMPEG_EXT.get_muxers()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -117,7 +117,7 @@ def get_audio_decoders() -> Dict[str, str]: ...@@ -117,7 +117,7 @@ def get_audio_decoders() -> Dict[str, str]:
... adx: CRI ADX ... adx: CRI ADX
... aiff: Audio IFF ... aiff: Audio IFF
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_audio_decoders() return torchaudio._extension._FFMPEG_EXT.get_audio_decoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -137,7 +137,7 @@ def get_audio_encoders() -> Dict[str, str]: ...@@ -137,7 +137,7 @@ def get_audio_encoders() -> Dict[str, str]:
... ac3_fixed: ATSC A/52A (AC-3) ... ac3_fixed: ATSC A/52A (AC-3)
... alac: ALAC (Apple Lossless Audio Codec) ... alac: ALAC (Apple Lossless Audio Codec)
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_audio_encoders() return torchaudio._extension._FFMPEG_EXT.get_audio_encoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -157,7 +157,7 @@ def get_video_decoders() -> Dict[str, str]: ...@@ -157,7 +157,7 @@ def get_video_decoders() -> Dict[str, str]:
... amv: AMV Video ... amv: AMV Video
... anm: Deluxe Paint Animation ... anm: Deluxe Paint Animation
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_video_decoders() return torchaudio._extension._FFMPEG_EXT.get_video_decoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -178,7 +178,7 @@ def get_video_encoders() -> Dict[str, str]: ...@@ -178,7 +178,7 @@ def get_video_encoders() -> Dict[str, str]:
... asv1: ASUS V1 ... asv1: ASUS V1
... asv2: ASUS V2 ... asv2: ASUS V2
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_video_encoders() return torchaudio._extension._FFMPEG_EXT.get_video_encoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -194,7 +194,7 @@ def get_input_devices() -> Dict[str, str]: ...@@ -194,7 +194,7 @@ def get_input_devices() -> Dict[str, str]:
... avfoundation: AVFoundation input device ... avfoundation: AVFoundation input device
... lavfi: Libavfilter virtual input device ... lavfi: Libavfilter virtual input device
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_input_devices() return torchaudio._extension._FFMPEG_EXT.get_input_devices()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -209,7 +209,7 @@ def get_output_devices() -> Dict[str, str]: ...@@ -209,7 +209,7 @@ def get_output_devices() -> Dict[str, str]:
>>> print(f"{k}: {v}") >>> print(f"{k}: {v}")
... audiotoolbox: AudioToolbox output device ... audiotoolbox: AudioToolbox output device
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_output_devices() return torchaudio._extension._FFMPEG_EXT.get_output_devices()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -223,7 +223,7 @@ def get_input_protocols() -> List[str]: ...@@ -223,7 +223,7 @@ def get_input_protocols() -> List[str]:
>>> print(get_input_protocols()) >>> print(get_input_protocols())
... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix'] ... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix']
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_input_protocols() return torchaudio._extension._FFMPEG_EXT.get_input_protocols()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -237,7 +237,7 @@ def get_output_protocols() -> List[str]: ...@@ -237,7 +237,7 @@ def get_output_protocols() -> List[str]:
>>> print(get_output_protocols()) >>> print(get_output_protocols())
... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix'] ... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix']
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_output_protocols() return torchaudio._extension._FFMPEG_EXT.get_output_protocols()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -251,10 +251,10 @@ def get_build_config() -> str: ...@@ -251,10 +251,10 @@ def get_build_config() -> str:
>>> print(get_build_config()) >>> print(get_build_config())
--prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang # noqa --prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang # noqa
""" """
return torchaudio.lib._torchaudio_ffmpeg.get_build_config() return torchaudio._extension._FFMPEG_EXT.get_build_config()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
def clear_cuda_context_cache(): def clear_cuda_context_cache():
"""Clear the CUDA context used by CUDA Hardware accelerated video decoding""" """Clear the CUDA context used by CUDA Hardware accelerated video decoding"""
torchaudio.lib._torchaudio_ffmpeg.clear_cuda_context_cache() torchaudio._extension._FFMPEG_EXT.clear_cuda_context_cache()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment