Commit 61c31bc0 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Migrate the binding of FFmpeg utils to PyBind11 (#3228)

Summary:
Utilities functions are only available to Python, so no need to use TorchBind for them.
This should allow us to remove link-whole flag when linking `libtorchaudio_ffmpeg` part.

Pull Request resolved: https://github.com/pytorch/audio/pull/3228

Reviewed By: nateanl

Differential Revision: D44639560

Pulled By: mthrok

fbshipit-source-id: 5116073ee8c5ab572c63ad123942c4826bfe1100
parent c22cd167
...@@ -88,9 +88,9 @@ def _init_ffmpeg(): ...@@ -88,9 +88,9 @@ def _init_ffmpeg():
import torchaudio.lib._torchaudio_ffmpeg # noqa import torchaudio.lib._torchaudio_ffmpeg # noqa
torch.ops.torchaudio.ffmpeg_init() torchaudio.lib._torchaudio_ffmpeg.init()
if torch.ops.torchaudio.ffmpeg_get_log_level() > 8: if torchaudio.lib._torchaudio_ffmpeg.get_log_level() > 8:
torch.ops.torchaudio.ffmpeg_set_log_level(8) torchaudio.lib._torchaudio_ffmpeg.set_log_level(8)
def _init_dll_path(): def _init_dll_path():
......
...@@ -21,7 +21,6 @@ set( ...@@ -21,7 +21,6 @@ set(
stream_writer/stream_writer.cpp stream_writer/stream_writer.cpp
stream_writer/tensor_converter.cpp stream_writer/tensor_converter.cpp
compat.cpp compat.cpp
utils.cpp
) )
if (USE_CUDA) if (USE_CUDA)
......
...@@ -8,6 +8,97 @@ namespace torchaudio { ...@@ -8,6 +8,97 @@ namespace torchaudio {
namespace io { namespace io {
namespace { namespace {
std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() {
std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> ret;
#define add_version(NAME) \
{ \
int ver = NAME##_version(); \
ret.emplace( \
"lib" #NAME, \
std::make_tuple<>( \
AV_VERSION_MAJOR(ver), \
AV_VERSION_MINOR(ver), \
AV_VERSION_MICRO(ver))); \
}
add_version(avutil);
add_version(avcodec);
add_version(avformat);
add_version(avfilter);
add_version(avdevice);
return ret;
#undef add_version
}
std::map<std::string, std::string> get_demuxers(bool req_device) {
std::map<std::string, std::string> ret;
const AVInputFormat* fmt = nullptr;
void* i = nullptr;
while ((fmt = av_demuxer_iterate(&i))) {
assert(fmt);
bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class;
return avclass && AV_IS_INPUT_DEVICE(avclass->category);
}();
if (req_device == is_device) {
ret.emplace(fmt->name, fmt->long_name);
}
}
return ret;
}
std::map<std::string, std::string> get_muxers(bool req_device) {
std::map<std::string, std::string> ret;
const AVOutputFormat* fmt = nullptr;
void* i = nullptr;
while ((fmt = av_muxer_iterate(&i))) {
assert(fmt);
bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class;
return avclass && AV_IS_OUTPUT_DEVICE(avclass->category);
}();
if (req_device == is_device) {
ret.emplace(fmt->name, fmt->long_name);
}
}
return ret;
}
std::map<std::string, std::string> get_codecs(
AVMediaType type,
bool req_encoder) {
const AVCodec* c = nullptr;
void* i = nullptr;
std::map<std::string, std::string> ret;
while ((c = av_codec_iterate(&i))) {
assert(c);
if ((req_encoder && av_codec_is_encoder(c)) ||
(!req_encoder && av_codec_is_decoder(c))) {
if (c->type == type && c->name) {
ret.emplace(c->name, c->long_name ? c->long_name : "");
}
}
}
return ret;
}
std::vector<std::string> get_protocols(bool output) {
void* opaque = nullptr;
const char* name = nullptr;
std::vector<std::string> ret;
while ((name = avio_enum_protocols(&opaque, output))) {
assert(name);
ret.emplace_back(name);
}
return ret;
}
std::string get_build_config() {
return avcodec_configuration();
}
// The reason we inherit FileObj instead of making it an attribute // The reason we inherit FileObj instead of making it an attribute
// is so that FileObj is instantiated first. // is so that FileObj is instantiated first.
// AVIOContext must be initialized before AVFormat, and outlive AVFormat. // AVIOContext must be initialized before AVFormat, and outlive AVFormat.
...@@ -31,7 +122,31 @@ struct StreamWriterFileObj : private FileObj, public StreamWriter { ...@@ -31,7 +122,31 @@ struct StreamWriterFileObj : private FileObj, public StreamWriter {
}; };
PYBIND11_MODULE(_torchaudio_ffmpeg, m) { PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
m.def("init", []() { avdevice_register_all(); });
m.def("get_log_level", []() { return av_log_get_level(); });
m.def("set_log_level", [](int level) { av_log_set_level(level); });
m.def("get_versions", &get_versions);
m.def("get_muxers", []() { return get_muxers(false); });
m.def("get_demuxers", []() { return get_demuxers(false); });
m.def("get_input_devices", []() { return get_demuxers(true); });
m.def("get_build_config", &get_build_config);
m.def("get_output_devices", []() { return get_muxers(true); });
m.def("get_audio_decoders", []() {
return get_codecs(AVMEDIA_TYPE_AUDIO, false);
});
m.def("get_audio_encoders", []() {
return get_codecs(AVMEDIA_TYPE_AUDIO, true);
});
m.def("get_video_decoders", []() {
return get_codecs(AVMEDIA_TYPE_VIDEO, false);
});
m.def("get_video_encoders", []() {
return get_codecs(AVMEDIA_TYPE_VIDEO, true);
});
m.def("get_input_protocols", []() { return get_protocols(false); });
m.def("get_output_protocols", []() { return get_protocols(true); });
m.def("clear_cuda_context_cache", &clear_cuda_context_cache); m.def("clear_cuda_context_cache", &clear_cuda_context_cache);
py::class_<Chunk>(m, "Chunk", py::module_local()) py::class_<Chunk>(m, "Chunk", py::module_local())
.def_readwrite("frames", &Chunk::frames) .def_readwrite("frames", &Chunk::frames)
.def_readwrite("pts", &Chunk::pts); .def_readwrite("pts", &Chunk::pts);
......
#include <torch/script.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
namespace torchaudio {
namespace io {
namespace {
c10::Dict<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() {
c10::Dict<std::string, std::tuple<int64_t, int64_t, int64_t>> ret;
#define add_version(NAME) \
{ \
int ver = NAME##_version(); \
ret.insert( \
"lib" #NAME, \
std::make_tuple<>( \
AV_VERSION_MAJOR(ver), \
AV_VERSION_MINOR(ver), \
AV_VERSION_MICRO(ver))); \
}
add_version(avutil);
add_version(avcodec);
add_version(avformat);
add_version(avfilter);
add_version(avdevice);
return ret;
#undef add_version
}
c10::Dict<std::string, std::string> get_demuxers(bool req_device) {
c10::Dict<std::string, std::string> ret;
const AVInputFormat* fmt = nullptr;
void* i = nullptr;
while ((fmt = av_demuxer_iterate(&i))) {
assert(fmt);
bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class;
return avclass && AV_IS_INPUT_DEVICE(avclass->category);
}();
if (req_device == is_device) {
ret.insert(fmt->name, fmt->long_name);
}
}
return ret;
}
c10::Dict<std::string, std::string> get_muxers(bool req_device) {
c10::Dict<std::string, std::string> ret;
const AVOutputFormat* fmt = nullptr;
void* i = nullptr;
while ((fmt = av_muxer_iterate(&i))) {
assert(fmt);
bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class;
return avclass && AV_IS_OUTPUT_DEVICE(avclass->category);
}();
if (req_device == is_device) {
ret.insert(fmt->name, fmt->long_name);
}
}
return ret;
}
c10::Dict<std::string, std::string> get_codecs(
AVMediaType type,
bool req_encoder) {
const AVCodec* c = nullptr;
void* i = nullptr;
c10::Dict<std::string, std::string> ret;
while ((c = av_codec_iterate(&i))) {
assert(c);
if ((req_encoder && av_codec_is_encoder(c)) ||
(!req_encoder && av_codec_is_decoder(c))) {
if (c->type == type && c->name) {
ret.insert(c->name, c->long_name ? c->long_name : "");
}
}
}
return ret;
}
std::vector<std::string> get_protocols(bool output) {
void* opaque = nullptr;
const char* name = nullptr;
std::vector<std::string> ret;
while ((name = avio_enum_protocols(&opaque, output))) {
assert(name);
ret.emplace_back(name);
}
return ret;
}
std::string get_build_config() {
return avcodec_configuration();
}
TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
m.def("torchaudio::ffmpeg_init", []() { avdevice_register_all(); });
m.def("torchaudio::ffmpeg_get_log_level", []() -> int64_t {
return static_cast<int64_t>(av_log_get_level());
});
m.def("torchaudio::ffmpeg_set_log_level", [](int64_t level) {
av_log_set_level(static_cast<int>(level));
});
m.def("torchaudio::ffmpeg_get_versions", &get_versions);
m.def("torchaudio::ffmpeg_get_muxers", []() { return get_muxers(false); });
m.def(
"torchaudio::ffmpeg_get_demuxers", []() { return get_demuxers(false); });
m.def("torchaudio::ffmpeg_get_input_devices", []() {
return get_demuxers(true);
});
m.def("torchaudio::ffmpeg_get_build_config", []() {
return get_build_config();
});
m.def("torchaudio::ffmpeg_get_output_devices", []() {
return get_muxers(true);
});
m.def("torchaudio::ffmpeg_get_audio_decoders", []() {
return get_codecs(AVMEDIA_TYPE_AUDIO, false);
});
m.def("torchaudio::ffmpeg_get_audio_encoders", []() {
return get_codecs(AVMEDIA_TYPE_AUDIO, true);
});
m.def("torchaudio::ffmpeg_get_video_decoders", []() {
return get_codecs(AVMEDIA_TYPE_VIDEO, false);
});
m.def("torchaudio::ffmpeg_get_video_encoders", []() {
return get_codecs(AVMEDIA_TYPE_VIDEO, true);
});
m.def("torchaudio::ffmpeg_get_input_protocols", []() {
return get_protocols(false);
});
m.def("torchaudio::ffmpeg_get_output_protocols", []() {
return get_protocols(true);
});
}
} // namespace
} // namespace io
} // namespace torchaudio
...@@ -16,7 +16,7 @@ def get_versions() -> Dict[str, Tuple[int]]: ...@@ -16,7 +16,7 @@ def get_versions() -> Dict[str, Tuple[int]]:
dict: mapping from library names to version string, dict: mapping from library names to version string,
i.e. `"libavutil": (56, 22, 100)`. i.e. `"libavutil": (56, 22, 100)`.
""" """
return torch.ops.torchaudio.ffmpeg_get_versions() return torchaudio.lib._torchaudio_ffmpeg.get_versions()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -25,7 +25,7 @@ def get_log_level() -> int: ...@@ -25,7 +25,7 @@ def get_log_level() -> int:
See :py:func:`set_log_level` for the detailo. See :py:func:`set_log_level` for the detailo.
""" """
return torch.ops.torchaudio.ffmpeg_get_log_level() return torchaudio.lib._torchaudio_ffmpeg.get_log_level()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -62,7 +62,7 @@ def set_log_level(level: int): ...@@ -62,7 +62,7 @@ def set_log_level(level: int):
Extremely verbose debugging, useful for libav* development. Extremely verbose debugging, useful for libav* development.
""" """
torch.ops.torchaudio.ffmpeg_set_log_level(level) torchaudio.lib._torchaudio_ffmpeg.set_log_level(level)
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -80,7 +80,7 @@ def get_demuxers() -> Dict[str, str]: ...@@ -80,7 +80,7 @@ def get_demuxers() -> Dict[str, str]:
... aax: CRI AAX ... aax: CRI AAX
... ac3: raw AC-3 ... ac3: raw AC-3
""" """
return torch.ops.torchaudio.ffmpeg_get_demuxers() return torchaudio.lib._torchaudio_ffmpeg.get_demuxers()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -99,7 +99,7 @@ def get_muxers() -> Dict[str, str]: ...@@ -99,7 +99,7 @@ def get_muxers() -> Dict[str, str]:
... adx: CRI ADX ... adx: CRI ADX
... aiff: Audio IFF ... aiff: Audio IFF
""" """
return torch.ops.torchaudio.ffmpeg_get_muxers() return torchaudio.lib._torchaudio_ffmpeg.get_muxers()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -118,7 +118,7 @@ def get_audio_decoders() -> Dict[str, str]: ...@@ -118,7 +118,7 @@ def get_audio_decoders() -> Dict[str, str]:
... adx: CRI ADX ... adx: CRI ADX
... aiff: Audio IFF ... aiff: Audio IFF
""" """
return torch.ops.torchaudio.ffmpeg_get_audio_decoders() return torchaudio.lib._torchaudio_ffmpeg.get_audio_decoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -138,7 +138,7 @@ def get_audio_encoders() -> Dict[str, str]: ...@@ -138,7 +138,7 @@ def get_audio_encoders() -> Dict[str, str]:
... ac3_fixed: ATSC A/52A (AC-3) ... ac3_fixed: ATSC A/52A (AC-3)
... alac: ALAC (Apple Lossless Audio Codec) ... alac: ALAC (Apple Lossless Audio Codec)
""" """
return torch.ops.torchaudio.ffmpeg_get_audio_encoders() return torchaudio.lib._torchaudio_ffmpeg.get_audio_encoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -158,7 +158,7 @@ def get_video_decoders() -> Dict[str, str]: ...@@ -158,7 +158,7 @@ def get_video_decoders() -> Dict[str, str]:
... amv: AMV Video ... amv: AMV Video
... anm: Deluxe Paint Animation ... anm: Deluxe Paint Animation
""" """
return torch.ops.torchaudio.ffmpeg_get_video_decoders() return torchaudio.lib._torchaudio_ffmpeg.get_video_decoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -179,7 +179,7 @@ def get_video_encoders() -> Dict[str, str]: ...@@ -179,7 +179,7 @@ def get_video_encoders() -> Dict[str, str]:
... asv1: ASUS V1 ... asv1: ASUS V1
... asv2: ASUS V2 ... asv2: ASUS V2
""" """
return torch.ops.torchaudio.ffmpeg_get_video_encoders() return torchaudio.lib._torchaudio_ffmpeg.get_video_encoders()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -195,7 +195,7 @@ def get_input_devices() -> Dict[str, str]: ...@@ -195,7 +195,7 @@ def get_input_devices() -> Dict[str, str]:
... avfoundation: AVFoundation input device ... avfoundation: AVFoundation input device
... lavfi: Libavfilter virtual input device ... lavfi: Libavfilter virtual input device
""" """
return torch.ops.torchaudio.ffmpeg_get_input_devices() return torchaudio.lib._torchaudio_ffmpeg.get_input_devices()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -210,7 +210,7 @@ def get_output_devices() -> Dict[str, str]: ...@@ -210,7 +210,7 @@ def get_output_devices() -> Dict[str, str]:
>>> print(f"{k}: {v}") >>> print(f"{k}: {v}")
... audiotoolbox: AudioToolbox output device ... audiotoolbox: AudioToolbox output device
""" """
return torch.ops.torchaudio.ffmpeg_get_output_devices() return torchaudio.lib._torchaudio_ffmpeg.get_output_devices()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -224,7 +224,7 @@ def get_input_protocols() -> List[str]: ...@@ -224,7 +224,7 @@ def get_input_protocols() -> List[str]:
>>> print(get_input_protocols()) >>> print(get_input_protocols())
... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix'] ... ['file', 'ftp', 'hls', 'http','https', 'pipe', 'rtmp', 'tcp', 'tls', 'udp', 'unix']
""" """
return torch.ops.torchaudio.ffmpeg_get_input_protocols() return torchaudio.lib._torchaudio_ffmpeg.get_input_protocols()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -238,7 +238,7 @@ def get_output_protocols() -> List[str]: ...@@ -238,7 +238,7 @@ def get_output_protocols() -> List[str]:
>>> print(get_output_protocols()) >>> print(get_output_protocols())
... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix'] ... ['file', 'ftp', 'http', 'https', 'md5', 'pipe', 'prompeg', 'rtmp', 'tee', 'tcp', 'tls', 'udp', 'unix']
""" """
return torch.ops.torchaudio.ffmpeg_get_output_protocols() return torchaudio.lib._torchaudio_ffmpeg.get_output_protocols()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
...@@ -252,7 +252,7 @@ def get_build_config() -> str: ...@@ -252,7 +252,7 @@ def get_build_config() -> str:
>>> print(get_build_config()) >>> print(get_build_config())
--prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang # noqa --prefix=/Users/runner/miniforge3 --cc=arm64-apple-darwin20.0.0-clang --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-neon --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-libvpx --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/pkg-config --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1646229390493/_build_env/bin/x86_64-apple-darwin13.4.0-clang # noqa
""" """
return torch.ops.torchaudio.ffmpeg_get_build_config() return torchaudio.lib._torchaudio_ffmpeg.get_build_config()
@torchaudio._extension.fail_if_no_ffmpeg @torchaudio._extension.fail_if_no_ffmpeg
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment