Commit ca66a1d3 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Revert "[audio][PR] Add option to dlopen FFmpeg libraries (#3402)" (#3456)

Summary:
This reverts commit b7d3e89a.

We will use pre-built binaries instead of dlopen.

Pull Request resolved: https://github.com/pytorch/audio/pull/3456

Differential Revision: D47239681

Pulled By: mthrok

fbshipit-source-id: 0446a62410d914081184fc20c386afa00b1e41b6
parent 662f067b
...@@ -37,7 +37,6 @@ _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX" ...@@ -37,7 +37,6 @@ _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX"
_BUILD_RIR = _get_build("BUILD_RIR", True) _BUILD_RIR = _get_build("BUILD_RIR", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True) _BUILD_RNNT = _get_build("BUILD_RNNT", True)
_USE_FFMPEG = _get_build("USE_FFMPEG", False) _USE_FFMPEG = _get_build("USE_FFMPEG", False)
_DLOPEN_FFMPEG = _get_build("DLOPEN_FFMPEG", False)
_USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None) _USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None)
_USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None) _USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None)
_BUILD_ALIGN = _get_build("BUILD_ALIGN", True) _BUILD_ALIGN = _get_build("BUILD_ALIGN", True)
...@@ -125,7 +124,6 @@ class CMakeBuild(build_ext): ...@@ -125,7 +124,6 @@ class CMakeBuild(build_ext):
f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}", f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}", f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}",
f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}", f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}",
f"-DDLOPEN_FFMPEG:BOOL={'ON' if _DLOPEN_FFMPEG else 'OFF'}",
] ]
build_args = ["--target", "install"] build_args = ["--target", "install"]
# Pass CUDA architecture to cmake # Pass CUDA architecture to cmake
......
...@@ -2,13 +2,11 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}") ...@@ -2,13 +2,11 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil) find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
add_library(ffmpeg INTERFACE) add_library(ffmpeg INTERFACE)
target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}") target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}")
if (NOT DLOPEN_FFMPEG)
target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}") target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}")
endif()
set( set(
sources sources
stub.cpp
ffmpeg.cpp ffmpeg.cpp
filter_graph.cpp filter_graph.cpp
hw_context.cpp hw_context.cpp
...@@ -33,24 +31,24 @@ if (USE_CUDA) ...@@ -33,24 +31,24 @@ if (USE_CUDA)
cuda_deps) cuda_deps)
endif() endif()
if (DLOPEN_FFMPEG)
set(compile_definitions DLOPEN_FFMPEG)
endif()
torchaudio_library( torchaudio_library(
libtorchaudio_ffmpeg libtorchaudio_ffmpeg
"${sources}" "${sources}"
"" ""
"torch;ffmpeg;${additional_lib}" "torch;ffmpeg;${additional_lib}"
"${compile_definitions}" ""
) )
if (BUILD_TORCHAUDIO_PYTHON_EXTENSION) if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
set(
ext_sources
pybind/pybind.cpp
)
torchaudio_extension( torchaudio_extension(
_torchaudio_ffmpeg _torchaudio_ffmpeg
pybind/pybind.cpp "${ext_sources}"
"" ""
"libtorchaudio_ffmpeg" "libtorchaudio_ffmpeg"
"${compile_definitions}" ""
) )
endif () endif ()
#include <c10/util/Exception.h> #include <c10/util/Exception.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h> #include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
...@@ -8,12 +7,6 @@ ...@@ -8,12 +7,6 @@
namespace torchaudio::io { namespace torchaudio::io {
std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
FFMPEG av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE);
return str;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// AVDictionary // AVDictionary
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -21,7 +14,7 @@ AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) { ...@@ -21,7 +14,7 @@ AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) {
AVDictionary* opt = nullptr; AVDictionary* opt = nullptr;
if (option) { if (option) {
for (auto const& [key, value] : option.value()) { for (auto const& [key, value] : option.value()) {
FFMPEG av_dict_set(&opt, key.c_str(), value.c_str(), 0); av_dict_set(&opt, key.c_str(), value.c_str(), 0);
} }
} }
return opt; return opt;
...@@ -32,10 +25,10 @@ void clean_up_dict(AVDictionary* p) { ...@@ -32,10 +25,10 @@ void clean_up_dict(AVDictionary* p) {
std::vector<std::string> unused_keys; std::vector<std::string> unused_keys;
// Check and copy unused keys, clean up the original dictionary // Check and copy unused keys, clean up the original dictionary
AVDictionaryEntry* t = nullptr; AVDictionaryEntry* t = nullptr;
while ((t = FFMPEG av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) { while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
unused_keys.emplace_back(t->key); unused_keys.emplace_back(t->key);
} }
FFMPEG av_dict_free(&p); av_dict_free(&p);
TORCH_CHECK( TORCH_CHECK(
unused_keys.empty(), unused_keys.empty(),
"Unexpected options: ", "Unexpected options: ",
...@@ -47,14 +40,14 @@ void clean_up_dict(AVDictionary* p) { ...@@ -47,14 +40,14 @@ void clean_up_dict(AVDictionary* p) {
// AVFormatContext // AVFormatContext
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVFormatInputContextDeleter::operator()(AVFormatContext* p) { void AVFormatInputContextDeleter::operator()(AVFormatContext* p) {
FFMPEG avformat_close_input(&p); avformat_close_input(&p);
}; };
AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p) AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {} : Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {}
void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) { void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) {
FFMPEG avformat_free_context(p); avformat_free_context(p);
}; };
AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p) AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
...@@ -64,9 +57,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p) ...@@ -64,9 +57,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
// AVIO // AVIO
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVIOContextDeleter::operator()(AVIOContext* p) { void AVIOContextDeleter::operator()(AVIOContext* p) {
FFMPEG avio_flush(p); avio_flush(p);
FFMPEG av_freep(&p->buffer); av_freep(&p->buffer);
FFMPEG av_freep(&p); av_freep(&p);
}; };
AVIOContextPtr::AVIOContextPtr(AVIOContext* p) AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
...@@ -76,13 +69,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p) ...@@ -76,13 +69,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
// AVPacket // AVPacket
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVPacketDeleter::operator()(AVPacket* p) { void AVPacketDeleter::operator()(AVPacket* p) {
FFMPEG av_packet_free(&p); av_packet_free(&p);
}; };
AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {} AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {}
AVPacketPtr alloc_avpacket() { AVPacketPtr alloc_avpacket() {
AVPacket* p = FFMPEG av_packet_alloc(); AVPacket* p = av_packet_alloc();
TORCH_CHECK(p, "Failed to allocate AVPacket object."); TORCH_CHECK(p, "Failed to allocate AVPacket object.");
return AVPacketPtr{p}; return AVPacketPtr{p};
} }
...@@ -92,7 +85,7 @@ AVPacketPtr alloc_avpacket() { ...@@ -92,7 +85,7 @@ AVPacketPtr alloc_avpacket() {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){}; AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
AutoPacketUnref::~AutoPacketUnref() { AutoPacketUnref::~AutoPacketUnref() {
FFMPEG av_packet_unref(p_); av_packet_unref(p_);
} }
AutoPacketUnref::operator AVPacket*() const { AutoPacketUnref::operator AVPacket*() const {
return p_; return p_;
...@@ -102,13 +95,13 @@ AutoPacketUnref::operator AVPacket*() const { ...@@ -102,13 +95,13 @@ AutoPacketUnref::operator AVPacket*() const {
// AVFrame // AVFrame
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVFrameDeleter::operator()(AVFrame* p) { void AVFrameDeleter::operator()(AVFrame* p) {
FFMPEG av_frame_free(&p); av_frame_free(&p);
}; };
AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {} AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {}
AVFramePtr alloc_avframe() { AVFramePtr alloc_avframe() {
AVFrame* p = FFMPEG av_frame_alloc(); AVFrame* p = av_frame_alloc();
TORCH_CHECK(p, "Failed to allocate AVFrame object."); TORCH_CHECK(p, "Failed to allocate AVFrame object.");
return AVFramePtr{p}; return AVFramePtr{p};
}; };
...@@ -117,7 +110,7 @@ AVFramePtr alloc_avframe() { ...@@ -117,7 +110,7 @@ AVFramePtr alloc_avframe() {
// AVCodecContext // AVCodecContext
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVCodecContextDeleter::operator()(AVCodecContext* p) { void AVCodecContextDeleter::operator()(AVCodecContext* p) {
FFMPEG avcodec_free_context(&p); avcodec_free_context(&p);
}; };
AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
...@@ -127,7 +120,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) ...@@ -127,7 +120,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
// AVBufferRefPtr // AVBufferRefPtr
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AutoBufferUnref::operator()(AVBufferRef* p) { void AutoBufferUnref::operator()(AVBufferRef* p) {
FFMPEG av_buffer_unref(&p); av_buffer_unref(&p);
} }
AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p) AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
...@@ -137,7 +130,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p) ...@@ -137,7 +130,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
// AVFilterGraph // AVFilterGraph
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVFilterGraphDeleter::operator()(AVFilterGraph* p) { void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
FFMPEG avfilter_graph_free(&p); avfilter_graph_free(&p);
}; };
AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p) AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
...@@ -147,7 +140,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p) ...@@ -147,7 +140,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
// AVCodecParameters // AVCodecParameters
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) { void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) {
FFMPEG avcodec_parameters_free(&codecpar); avcodec_parameters_free(&codecpar);
} }
AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p) AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p)
......
...@@ -41,7 +41,10 @@ using OptionDict = std::map<std::string, std::string>; ...@@ -41,7 +41,10 @@ using OptionDict = std::map<std::string, std::string>;
// Replacement of av_err2str, which causes // Replacement of av_err2str, which causes
// `error: taking address of temporary array` // `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5 // https://github.com/joncampbell123/composite-video-simulator/issues/5
std::string av_err2string(int errnum); av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
// Base structure that handles memory management. // Base structure that handles memory management.
// Resource is freed by the destructor of unique_ptr, // Resource is freed by the destructor of unique_ptr,
......
#include <torchaudio/csrc/ffmpeg/filter_graph.h> #include <torchaudio/csrc/ffmpeg/filter_graph.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <stdexcept> #include <stdexcept>
namespace torchaudio::io { namespace torchaudio::io {
namespace { namespace {
AVFilterGraph* get_filter_graph() { AVFilterGraph* get_filter_graph() {
AVFilterGraph* ptr = FFMPEG avfilter_graph_alloc(); AVFilterGraph* ptr = avfilter_graph_alloc();
TORCH_CHECK(ptr, "Failed to allocate resouce."); TORCH_CHECK(ptr, "Failed to allocate resouce.");
ptr->nb_threads = 1; ptr->nb_threads = 1;
return ptr; return ptr;
...@@ -32,7 +31,7 @@ std::string get_audio_src_args( ...@@ -32,7 +31,7 @@ std::string get_audio_src_args(
time_base.num, time_base.num,
time_base.den, time_base.den,
sample_rate, sample_rate,
FFMPEG av_get_sample_fmt_name(format), av_get_sample_fmt_name(format),
channel_layout); channel_layout);
return std::string(args); return std::string(args);
} }
...@@ -51,7 +50,7 @@ std::string get_video_src_args( ...@@ -51,7 +50,7 @@ std::string get_video_src_args(
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d", "video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d",
width, width,
height, height,
FFMPEG av_get_pix_fmt_name(format), av_get_pix_fmt_name(format),
time_base.num, time_base.num,
time_base.den, time_base.den,
frame_rate.num, frame_rate.num,
...@@ -69,7 +68,7 @@ void FilterGraph::add_audio_src( ...@@ -69,7 +68,7 @@ void FilterGraph::add_audio_src(
int sample_rate, int sample_rate,
uint64_t channel_layout) { uint64_t channel_layout) {
add_src( add_src(
FFMPEG avfilter_get_by_name("abuffer"), avfilter_get_by_name("abuffer"),
get_audio_src_args(format, time_base, sample_rate, channel_layout)); get_audio_src_args(format, time_base, sample_rate, channel_layout));
} }
...@@ -81,13 +80,13 @@ void FilterGraph::add_video_src( ...@@ -81,13 +80,13 @@ void FilterGraph::add_video_src(
int height, int height,
AVRational sample_aspect_ratio) { AVRational sample_aspect_ratio) {
add_src( add_src(
FFMPEG avfilter_get_by_name("buffer"), avfilter_get_by_name("buffer"),
get_video_src_args( get_video_src_args(
format, time_base, frame_rate, width, height, sample_aspect_ratio)); format, time_base, frame_rate, width, height, sample_aspect_ratio));
} }
void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) { void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
int ret = FFMPEG avfilter_graph_create_filter( int ret = avfilter_graph_create_filter(
&buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph); &buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
...@@ -96,11 +95,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) { ...@@ -96,11 +95,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
} }
void FilterGraph::add_audio_sink() { void FilterGraph::add_audio_sink() {
add_sink(FFMPEG avfilter_get_by_name("abuffersink")); add_sink(avfilter_get_by_name("abuffersink"));
} }
void FilterGraph::add_video_sink() { void FilterGraph::add_video_sink() {
add_sink(FFMPEG avfilter_get_by_name("buffersink")); add_sink(avfilter_get_by_name("buffersink"));
} }
void FilterGraph::add_sink(const AVFilter* buffersink) { void FilterGraph::add_sink(const AVFilter* buffersink) {
...@@ -114,7 +113,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) { ...@@ -114,7 +113,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) {
// According to the other example // According to the other example
// https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html // https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
// `abuffersink` should not take options, and this resolved issue. // `abuffersink` should not take options, and this resolved issue.
int ret = FFMPEG avfilter_graph_create_filter( int ret = avfilter_graph_create_filter(
&buffersink_ctx, buffersink, "out", nullptr, nullptr, graph); &buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
TORCH_CHECK(ret >= 0, "Failed to create output filter."); TORCH_CHECK(ret >= 0, "Failed to create output filter.");
} }
...@@ -131,15 +130,15 @@ class InOuts { ...@@ -131,15 +130,15 @@ class InOuts {
public: public:
InOuts(const char* name, AVFilterContext* pCtx) { InOuts(const char* name, AVFilterContext* pCtx) {
p = FFMPEG avfilter_inout_alloc(); p = avfilter_inout_alloc();
TORCH_CHECK(p, "Failed to allocate AVFilterInOut."); TORCH_CHECK(p, "Failed to allocate AVFilterInOut.");
p->name = FFMPEG av_strdup(name); p->name = av_strdup(name);
p->filter_ctx = pCtx; p->filter_ctx = pCtx;
p->pad_idx = 0; p->pad_idx = 0;
p->next = nullptr; p->next = nullptr;
} }
~InOuts() { ~InOuts() {
FFMPEG avfilter_inout_free(&p); avfilter_inout_free(&p);
} }
operator AVFilterInOut**() { operator AVFilterInOut**() {
return &p; return &p;
...@@ -156,7 +155,7 @@ void FilterGraph::add_process(const std::string& filter_description) { ...@@ -156,7 +155,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
// If you are debugging this part of the code, you might get confused. // If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx}; InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};
int ret = FFMPEG avfilter_graph_parse_ptr( int ret = avfilter_graph_parse_ptr(
graph, filter_description.c_str(), out, in, nullptr); graph, filter_description.c_str(), out, in, nullptr);
TORCH_CHECK( TORCH_CHECK(
...@@ -167,11 +166,11 @@ void FilterGraph::add_process(const std::string& filter_description) { ...@@ -167,11 +166,11 @@ void FilterGraph::add_process(const std::string& filter_description) {
void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) { void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx; buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
int ret = FFMPEG avfilter_graph_config(graph, nullptr); int ret = avfilter_graph_config(graph, nullptr);
TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret)); TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
// char* desc = FFMPEG avfilter_graph_dump(graph, NULL); // char* desc = avfilter_graph_dump(graph, NULL);
// std::cerr << "Filter created:\n" << desc << std::endl; // std::cerr << "Filter created:\n" << desc << std::endl;
// FFMPEG av_free(static_cast<void*>(desc)); // av_free(static_cast<void*>(desc));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -191,8 +190,7 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { ...@@ -191,8 +190,7 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
ret.num_channels = l->ch_layout.nb_channels; ret.num_channels = l->ch_layout.nb_channels;
#else #else
// Before FFmpeg 5.1 // Before FFmpeg 5.1
ret.num_channels = ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout);
FFMPEG av_get_channel_layout_nb_channels(l->channel_layout);
#endif #endif
break; break;
} }
...@@ -215,12 +213,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { ...@@ -215,12 +213,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
// Streaming process // Streaming process
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
int FilterGraph::add_frame(AVFrame* pInputFrame) { int FilterGraph::add_frame(AVFrame* pInputFrame) {
return FFMPEG av_buffersrc_add_frame_flags( return av_buffersrc_add_frame_flags(
buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF); buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
} }
int FilterGraph::get_frame(AVFrame* pOutputFrame) { int FilterGraph::get_frame(AVFrame* pOutputFrame) {
return FFMPEG av_buffersink_get_frame(buffersink_ctx, pOutputFrame); return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
} }
} // namespace torchaudio::io } // namespace torchaudio::io
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
namespace torchaudio::io { namespace torchaudio::io {
namespace { namespace {
...@@ -16,7 +15,7 @@ AVBufferRef* get_cuda_context(int index) { ...@@ -16,7 +15,7 @@ AVBufferRef* get_cuda_context(int index) {
} }
if (CUDA_CONTEXT_CACHE.count(index) == 0) { if (CUDA_CONTEXT_CACHE.count(index) == 0) {
AVBufferRef* p = nullptr; AVBufferRef* p = nullptr;
int ret = FFMPEG av_hwdevice_ctx_create( int ret = av_hwdevice_ctx_create(
&p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0); &p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h> #include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h> #include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
namespace torchaudio::io { namespace torchaudio::io {
namespace { namespace {
...@@ -12,7 +11,7 @@ std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() { ...@@ -12,7 +11,7 @@ std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() {
#define add_version(NAME) \ #define add_version(NAME) \
{ \ { \
int ver = FFMPEG NAME##_version(); \ int ver = NAME##_version(); \
ret.emplace( \ ret.emplace( \
"lib" #NAME, \ "lib" #NAME, \
std::make_tuple<>( \ std::make_tuple<>( \
...@@ -35,7 +34,7 @@ std::map<std::string, std::string> get_demuxers(bool req_device) { ...@@ -35,7 +34,7 @@ std::map<std::string, std::string> get_demuxers(bool req_device) {
std::map<std::string, std::string> ret; std::map<std::string, std::string> ret;
const AVInputFormat* fmt = nullptr; const AVInputFormat* fmt = nullptr;
void* i = nullptr; void* i = nullptr;
while ((fmt = FFMPEG av_demuxer_iterate(&i))) { while ((fmt = av_demuxer_iterate(&i))) {
assert(fmt); assert(fmt);
bool is_device = [&]() { bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class; const AVClass* avclass = fmt->priv_class;
...@@ -52,7 +51,7 @@ std::map<std::string, std::string> get_muxers(bool req_device) { ...@@ -52,7 +51,7 @@ std::map<std::string, std::string> get_muxers(bool req_device) {
std::map<std::string, std::string> ret; std::map<std::string, std::string> ret;
const AVOutputFormat* fmt = nullptr; const AVOutputFormat* fmt = nullptr;
void* i = nullptr; void* i = nullptr;
while ((fmt = FFMPEG av_muxer_iterate(&i))) { while ((fmt = av_muxer_iterate(&i))) {
assert(fmt); assert(fmt);
bool is_device = [&]() { bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class; const AVClass* avclass = fmt->priv_class;
...@@ -71,10 +70,10 @@ std::map<std::string, std::string> get_codecs( ...@@ -71,10 +70,10 @@ std::map<std::string, std::string> get_codecs(
const AVCodec* c = nullptr; const AVCodec* c = nullptr;
void* i = nullptr; void* i = nullptr;
std::map<std::string, std::string> ret; std::map<std::string, std::string> ret;
while ((c = FFMPEG av_codec_iterate(&i))) { while ((c = av_codec_iterate(&i))) {
assert(c); assert(c);
if ((req_encoder && FFMPEG av_codec_is_encoder(c)) || if ((req_encoder && av_codec_is_encoder(c)) ||
(!req_encoder && FFMPEG av_codec_is_decoder(c))) { (!req_encoder && av_codec_is_decoder(c))) {
if (c->type == type && c->name) { if (c->type == type && c->name) {
ret.emplace(c->name, c->long_name ? c->long_name : ""); ret.emplace(c->name, c->long_name ? c->long_name : "");
} }
...@@ -87,7 +86,7 @@ std::vector<std::string> get_protocols(bool output) { ...@@ -87,7 +86,7 @@ std::vector<std::string> get_protocols(bool output) {
void* opaque = nullptr; void* opaque = nullptr;
const char* name = nullptr; const char* name = nullptr;
std::vector<std::string> ret; std::vector<std::string> ret;
while ((name = FFMPEG avio_enum_protocols(&opaque, output))) { while ((name = avio_enum_protocols(&opaque, output))) {
assert(name); assert(name);
ret.emplace_back(name); ret.emplace_back(name);
} }
...@@ -95,7 +94,7 @@ std::vector<std::string> get_protocols(bool output) { ...@@ -95,7 +94,7 @@ std::vector<std::string> get_protocols(bool output) {
} }
std::string get_build_config() { std::string get_build_config() {
return FFMPEG avcodec_configuration(); return avcodec_configuration();
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -188,9 +187,9 @@ struct StreamWriterFileObj : private FileObj, public StreamWriterCustomIO { ...@@ -188,9 +187,9 @@ struct StreamWriterFileObj : private FileObj, public StreamWriterCustomIO {
}; };
PYBIND11_MODULE(_torchaudio_ffmpeg, m) { PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
m.def("init", []() { FFMPEG avdevice_register_all(); }); m.def("init", []() { avdevice_register_all(); });
m.def("get_log_level", []() { return FFMPEG av_log_get_level(); }); m.def("get_log_level", []() { return av_log_get_level(); });
m.def("set_log_level", [](int level) { FFMPEG av_log_set_level(level); }); m.def("set_log_level", [](int level) { av_log_set_level(level); });
m.def("get_versions", &get_versions); m.def("get_versions", &get_versions);
m.def("get_muxers", []() { return get_muxers(false); }); m.def("get_muxers", []() { return get_muxers(false); });
m.def("get_demuxers", []() { return get_demuxers(false); }); m.def("get_demuxers", []() { return get_demuxers(false); });
...@@ -246,22 +245,21 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) { ...@@ -246,22 +245,21 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
.def_property_readonly( .def_property_readonly(
"media_type", "media_type",
[](const OutputStreamInfo& o) -> std::string { [](const OutputStreamInfo& o) -> std::string {
return FFMPEG av_get_media_type_string(o.media_type); return av_get_media_type_string(o.media_type);
}) })
.def_property_readonly( .def_property_readonly(
"format", "format",
[](const OutputStreamInfo& o) -> std::string { [](const OutputStreamInfo& o) -> std::string {
switch (o.media_type) { switch (o.media_type) {
case AVMEDIA_TYPE_AUDIO: case AVMEDIA_TYPE_AUDIO:
return FFMPEG av_get_sample_fmt_name( return av_get_sample_fmt_name((AVSampleFormat)(o.format));
(AVSampleFormat)(o.format));
case AVMEDIA_TYPE_VIDEO: case AVMEDIA_TYPE_VIDEO:
return FFMPEG av_get_pix_fmt_name((AVPixelFormat)(o.format)); return av_get_pix_fmt_name((AVPixelFormat)(o.format));
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false,
"FilterGraph is returning unexpected media type: ", "FilterGraph is returning unexpected media type: ",
FFMPEG av_get_media_type_string(o.media_type)); av_get_media_type_string(o.media_type));
} }
}) })
.def_readonly("sample_rate", &OutputStreamInfo::sample_rate) .def_readonly("sample_rate", &OutputStreamInfo::sample_rate)
...@@ -285,7 +283,7 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) { ...@@ -285,7 +283,7 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
.def_property_readonly( .def_property_readonly(
"media_type", "media_type",
[](const SrcStreamInfo& s) { [](const SrcStreamInfo& s) {
return FFMPEG av_get_media_type_string(s.media_type); return av_get_media_type_string(s.media_type);
}) })
.def_readonly("codec_name", &SrcStreamInfo::codec_name) .def_readonly("codec_name", &SrcStreamInfo::codec_name)
.def_readonly("codec_long_name", &SrcStreamInfo::codec_long_name) .def_readonly("codec_long_name", &SrcStreamInfo::codec_long_name)
......
#include <torch/torch.h> #include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h> #include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#ifdef USE_CUDA #ifdef USE_CUDA
#include <c10/cuda/CUDAStream.h> #include <c10/cuda/CUDAStream.h>
...@@ -429,11 +428,11 @@ void NV12CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { ...@@ -429,11 +428,11 @@ void NV12CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_CUDA == fmt, AV_PIX_FMT_CUDA == fmt,
"Expected CUDA frame. Found: ", "Expected CUDA frame. Found: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_NV12 == sw_fmt, AV_PIX_FMT_NV12 == sw_fmt,
"Expected NV12 format. Found: ", "Expected NV12 format. Found: ",
FFMPEG av_get_pix_fmt_name(sw_fmt)); av_get_pix_fmt_name(sw_fmt));
// Write Y plane directly // Write Y plane directly
auto status = cudaMemcpy2D( auto status = cudaMemcpy2D(
...@@ -510,11 +509,11 @@ void P010CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { ...@@ -510,11 +509,11 @@ void P010CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_CUDA == fmt, AV_PIX_FMT_CUDA == fmt,
"Expected CUDA frame. Found: ", "Expected CUDA frame. Found: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_P010 == sw_fmt, AV_PIX_FMT_P010 == sw_fmt,
"Expected P010 format. Found: ", "Expected P010 format. Found: ",
FFMPEG av_get_pix_fmt_name(sw_fmt)); av_get_pix_fmt_name(sw_fmt));
// Write Y plane directly // Write Y plane directly
auto status = cudaMemcpy2D( auto status = cudaMemcpy2D(
...@@ -591,11 +590,11 @@ void YUV444PCudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { ...@@ -591,11 +590,11 @@ void YUV444PCudaConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_CUDA == fmt, AV_PIX_FMT_CUDA == fmt,
"Expected CUDA frame. Found: ", "Expected CUDA frame. Found: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_YUV444P == sw_fmt, AV_PIX_FMT_YUV444P == sw_fmt,
"Expected YUV444P format. Found: ", "Expected YUV444P format. Found: ",
FFMPEG av_get_pix_fmt_name(sw_fmt)); av_get_pix_fmt_name(sw_fmt));
// Write Y plane directly // Write Y plane directly
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
......
#include <torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
namespace torchaudio::io { namespace torchaudio::io {
void PacketBuffer::push_packet(AVPacket* packet) { void PacketBuffer::push_packet(AVPacket* packet) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(packet, "Packet is null."); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(packet, "Packet is null.");
AVPacket* p = FFMPEG av_packet_clone(packet); AVPacket* p = av_packet_clone(packet);
TORCH_INTERNAL_ASSERT(p, "Failed to clone packet."); TORCH_INTERNAL_ASSERT(p, "Failed to clone packet.");
packets.emplace_back(p); packets.emplace_back(p);
} }
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h> #include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/post_process.h> #include <torchaudio/csrc/ffmpeg/stream_reader/post_process.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
namespace torchaudio::io { namespace torchaudio::io {
namespace detail { namespace detail {
...@@ -49,7 +48,7 @@ FilterGraphFactory get_video_factory( ...@@ -49,7 +48,7 @@ FilterGraphFactory get_video_factory(
f.add_video_sink(); f.add_video_sink();
f.add_process(filter_desc); f.add_process(filter_desc);
if (hw_frames_ctx) { if (hw_frames_ctx) {
f.create_filter(FFMPEG av_buffer_ref(hw_frames_ctx)); f.create_filter(av_buffer_ref(hw_frames_ctx));
} else { } else {
f.create_filter(); f.create_filter();
} }
...@@ -140,7 +139,7 @@ struct ProcessImpl : public IPostDecodeProcess { ...@@ -140,7 +139,7 @@ struct ProcessImpl : public IPostDecodeProcess {
if (ret >= 0) { if (ret >= 0) {
buffer.push_frame(converter.convert(frame), frame->pts); buffer.push_frame(converter.convert(frame), frame->pts);
} }
FFMPEG av_frame_unref(frame); av_frame_unref(frame);
} }
return ret; return ret;
} }
...@@ -160,7 +159,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process( ...@@ -160,7 +159,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process(
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
i.type == AVMEDIA_TYPE_AUDIO, i.type == AVMEDIA_TYPE_AUDIO,
"Unsupported media type found: ", "Unsupported media type found: ",
FFMPEG av_get_media_type_string(i.type)); av_get_media_type_string(i.type));
using B = UnchunkedBuffer; using B = UnchunkedBuffer;
...@@ -227,7 +226,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process( ...@@ -227,7 +226,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process(
} }
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unexpected audio type:", FFMPEG av_get_sample_fmt_name(fmt)); false, "Unexpected audio type:", av_get_sample_fmt_name(fmt));
} }
} }
...@@ -240,7 +239,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process( ...@@ -240,7 +239,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_AUDIO, i.type == AVMEDIA_TYPE_AUDIO,
"Unsupported media type found: ", "Unsupported media type found: ",
FFMPEG av_get_media_type_string(i.type)); av_get_media_type_string(i.type));
using B = ChunkedBuffer; using B = ChunkedBuffer;
B buffer{i.time_base, frames_per_chunk, num_chunks}; B buffer{i.time_base, frames_per_chunk, num_chunks};
...@@ -308,7 +307,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process( ...@@ -308,7 +307,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process(
} }
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unexpected audio type:", FFMPEG av_get_sample_fmt_name(fmt)); false, "Unexpected audio type:", av_get_sample_fmt_name(fmt));
} }
} }
...@@ -322,7 +321,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process( ...@@ -322,7 +321,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
FFMPEG av_get_media_type_string(i.type)); av_get_media_type_string(i.type));
auto h = i.height; auto h = i.height;
auto w = i.width; auto w = i.width;
...@@ -376,9 +375,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process( ...@@ -376,9 +375,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process(
} }
default: { default: {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false, "Unexpected video format found: ", av_get_pix_fmt_name(fmt));
"Unexpected video format found: ",
FFMPEG av_get_pix_fmt_name(fmt));
} }
} }
} }
...@@ -396,7 +393,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process( ...@@ -396,7 +393,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
FFMPEG av_get_media_type_string(i.type)); av_get_media_type_string(i.type));
using B = UnchunkedBuffer; using B = UnchunkedBuffer;
switch (auto fmt = (AVPixelFormat)i.format; fmt) { switch (auto fmt = (AVPixelFormat)i.format; fmt) {
...@@ -419,13 +416,13 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process( ...@@ -419,13 +416,13 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process(
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unsupported video format found in CUDA HW: ", "Unsupported video format found in CUDA HW: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
} }
default: { default: {
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unexpected video format found in CUDA HW: ", "Unexpected video format found in CUDA HW: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
} }
} }
#endif #endif
...@@ -440,7 +437,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process( ...@@ -440,7 +437,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
FFMPEG av_get_media_type_string(i.type)); av_get_media_type_string(i.type));
auto h = i.height; auto h = i.height;
auto w = i.width; auto w = i.width;
...@@ -494,9 +491,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process( ...@@ -494,9 +491,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process(
} }
default: { default: {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false, "Unexpected video format found: ", av_get_pix_fmt_name(fmt));
"Unexpected video format found: ",
FFMPEG av_get_pix_fmt_name(fmt));
} }
} }
} }
...@@ -516,7 +511,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process( ...@@ -516,7 +511,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
FFMPEG av_get_media_type_string(i.type)); av_get_media_type_string(i.type));
using B = ChunkedBuffer; using B = ChunkedBuffer;
switch (auto fmt = (AVPixelFormat)i.format; fmt) { switch (auto fmt = (AVPixelFormat)i.format; fmt) {
...@@ -545,13 +540,13 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process( ...@@ -545,13 +540,13 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process(
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unsupported video format found in CUDA HW: ", "Unsupported video format found in CUDA HW: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
} }
default: { default: {
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unexpected video format found in CUDA HW: ", "Unexpected video format found in CUDA HW: ",
FFMPEG av_get_pix_fmt_name(fmt)); av_get_pix_fmt_name(fmt));
} }
} }
#endif #endif
......
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_processor.h> #include <torchaudio/csrc/ffmpeg/stream_reader/stream_processor.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <stdexcept> #include <stdexcept>
#include <string_view> #include <string_view>
namespace torchaudio::io { namespace torchaudio::io {
namespace { namespace {
AVCodecContextPtr alloc_codec_context( AVCodecContextPtr alloc_codec_context(
enum AVCodecID codec_id, enum AVCodecID codec_id,
...@@ -12,24 +12,24 @@ AVCodecContextPtr alloc_codec_context( ...@@ -12,24 +12,24 @@ AVCodecContextPtr alloc_codec_context(
const AVCodec* codec = [&]() { const AVCodec* codec = [&]() {
if (decoder_name) { if (decoder_name) {
const AVCodec* c = const AVCodec* c =
FFMPEG avcodec_find_decoder_by_name(decoder_name.value().c_str()); avcodec_find_decoder_by_name(decoder_name.value().c_str());
TORCH_CHECK(c, "Unsupported codec: ", decoder_name.value()); TORCH_CHECK(c, "Unsupported codec: ", decoder_name.value());
return c; return c;
} else { } else {
const AVCodec* c = FFMPEG avcodec_find_decoder(codec_id); const AVCodec* c = avcodec_find_decoder(codec_id);
TORCH_CHECK(c, "Unsupported codec: ", FFMPEG avcodec_get_name(codec_id)); TORCH_CHECK(c, "Unsupported codec: ", avcodec_get_name(codec_id));
return c; return c;
} }
}(); }();
AVCodecContext* codec_ctx = FFMPEG avcodec_alloc_context3(codec); AVCodecContext* codec_ctx = avcodec_alloc_context3(codec);
TORCH_CHECK(codec_ctx, "Failed to allocate CodecContext."); TORCH_CHECK(codec_ctx, "Failed to allocate CodecContext.");
return AVCodecContextPtr(codec_ctx); return AVCodecContextPtr(codec_ctx);
} }
const AVCodecHWConfig* get_cuda_config(const AVCodec* codec) { const AVCodecHWConfig* get_cuda_config(const AVCodec* codec) {
for (int i = 0;; ++i) { for (int i = 0;; ++i) {
const AVCodecHWConfig* config = FFMPEG avcodec_get_hw_config(codec, i); const AVCodecHWConfig* config = avcodec_get_hw_config(codec, i);
if (!config) { if (!config) {
break; break;
} }
...@@ -82,7 +82,7 @@ enum AVPixelFormat get_hw_format( ...@@ -82,7 +82,7 @@ enum AVPixelFormat get_hw_format(
} }
AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) { AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) {
AVBufferRef* p = FFMPEG av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx); AVBufferRef* p = av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx);
TORCH_CHECK( TORCH_CHECK(
p, p,
"Failed to allocate CUDA frame context from device context at ", "Failed to allocate CUDA frame context from device context at ",
...@@ -93,11 +93,11 @@ AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) { ...@@ -93,11 +93,11 @@ AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) {
frames_ctx->width = codec_ctx->width; frames_ctx->width = codec_ctx->width;
frames_ctx->height = codec_ctx->height; frames_ctx->height = codec_ctx->height;
frames_ctx->initial_pool_size = 5; frames_ctx->initial_pool_size = 5;
int ret = FFMPEG av_hwframe_ctx_init(p); int ret = av_hwframe_ctx_init(p);
if (ret >= 0) { if (ret >= 0) {
return p; return p;
} }
FFMPEG av_buffer_unref(&p); av_buffer_unref(&p);
TORCH_CHECK( TORCH_CHECK(
false, "Failed to initialize CUDA frame context: ", av_err2string(ret)); false, "Failed to initialize CUDA frame context: ", av_err2string(ret));
} }
...@@ -106,7 +106,7 @@ void configure_codec_context( ...@@ -106,7 +106,7 @@ void configure_codec_context(
AVCodecContext* codec_ctx, AVCodecContext* codec_ctx,
const AVCodecParameters* params, const AVCodecParameters* params,
const torch::Device& device) { const torch::Device& device) {
int ret = FFMPEG avcodec_parameters_to_context(codec_ctx, params); int ret = avcodec_parameters_to_context(codec_ctx, params);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to set CodecContext parameter: ", av_err2string(ret)); ret >= 0, "Failed to set CodecContext parameter: ", av_err2string(ret));
...@@ -121,8 +121,7 @@ void configure_codec_context( ...@@ -121,8 +121,7 @@ void configure_codec_context(
// 2. Set pCodecContext->get_format call back function which // 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer. // will retrieve the HW pixel format from opaque pointer.
codec_ctx->get_format = get_hw_format; codec_ctx->get_format = get_hw_format;
codec_ctx->hw_device_ctx = codec_ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index()));
FFMPEG av_buffer_ref(get_cuda_context(device.index()));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
codec_ctx->hw_device_ctx, "Failed to reference HW device context."); codec_ctx->hw_device_ctx, "Failed to reference HW device context.");
#endif #endif
...@@ -135,16 +134,16 @@ void open_codec( ...@@ -135,16 +134,16 @@ void open_codec(
AVDictionary* opts = get_option_dict(decoder_option); AVDictionary* opts = get_option_dict(decoder_option);
// Default to single thread execution. // Default to single thread execution.
if (!FFMPEG av_dict_get(opts, "threads", nullptr, 0)) { if (!av_dict_get(opts, "threads", nullptr, 0)) {
FFMPEG av_dict_set(&opts, "threads", "1", 0); av_dict_set(&opts, "threads", "1", 0);
} }
if (!codec_ctx->channel_layout) { if (!codec_ctx->channel_layout) {
codec_ctx->channel_layout = codec_ctx->channel_layout =
FFMPEG av_get_default_channel_layout(codec_ctx->channels); av_get_default_channel_layout(codec_ctx->channels);
} }
int ret = FFMPEG avcodec_open2(codec_ctx, codec_ctx->codec, &opts); int ret = avcodec_open2(codec_ctx, codec_ctx->codec, &opts);
clean_up_dict(opts); clean_up_dict(opts);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to initialize CodecContext: ", av_err2string(ret)); ret >= 0, "Failed to initialize CodecContext: ", av_err2string(ret));
...@@ -259,8 +258,8 @@ void StreamProcessor::remove_stream(KeyType key) { ...@@ -259,8 +258,8 @@ void StreamProcessor::remove_stream(KeyType key) {
void StreamProcessor::set_discard_timestamp(int64_t timestamp) { void StreamProcessor::set_discard_timestamp(int64_t timestamp) {
TORCH_CHECK(timestamp >= 0, "timestamp must be non-negative."); TORCH_CHECK(timestamp >= 0, "timestamp must be non-negative.");
discard_before_pts = FFMPEG av_rescale_q( discard_before_pts =
timestamp, FFMPEG av_get_time_base_q(), stream_time_base); av_rescale_q(timestamp, av_get_time_base_q(), stream_time_base);
} }
void StreamProcessor::set_decoder( void StreamProcessor::set_decoder(
...@@ -306,9 +305,9 @@ int StreamProcessor::process_packet(AVPacket* packet) { ...@@ -306,9 +305,9 @@ int StreamProcessor::process_packet(AVPacket* packet) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
is_decoder_set(), is_decoder_set(),
"Decoder must have been set prior to calling this function."); "Decoder must have been set prior to calling this function.");
int ret = FFMPEG avcodec_send_packet(codec_ctx, packet); int ret = avcodec_send_packet(codec_ctx, packet);
while (ret >= 0) { while (ret >= 0) {
ret = FFMPEG avcodec_receive_frame(codec_ctx, frame); ret = avcodec_receive_frame(codec_ctx, frame);
// AVERROR(EAGAIN) means that new input data is required to return new // AVERROR(EAGAIN) means that new input data is required to return new
// output. // output.
if (ret == AVERROR(EAGAIN)) if (ret == AVERROR(EAGAIN))
...@@ -355,7 +354,7 @@ int StreamProcessor::process_packet(AVPacket* packet) { ...@@ -355,7 +354,7 @@ int StreamProcessor::process_packet(AVPacket* packet) {
} }
// else we can just unref the frame and continue // else we can just unref the frame and continue
FFMPEG av_frame_unref(frame); av_frame_unref(frame);
} }
return ret; return ret;
} }
...@@ -364,7 +363,7 @@ void StreamProcessor::flush() { ...@@ -364,7 +363,7 @@ void StreamProcessor::flush() {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
is_decoder_set(), is_decoder_set(),
"Decoder must have been set prior to calling this function."); "Decoder must have been set prior to calling this function.");
FFMPEG avcodec_flush_buffers(codec_ctx); avcodec_flush_buffers(codec_ctx);
for (auto& ite : post_processes) { for (auto& ite : post_processes) {
ite.second->flush(); ite.second->flush();
} }
......
#include <torchaudio/csrc/ffmpeg/ffmpeg.h> #include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h> #include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <chrono> #include <chrono>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <thread> #include <thread>
extern "C" {
#include <libavutil/rational.h>
}
namespace torchaudio::io { namespace torchaudio::io {
using KeyType = StreamProcessor::KeyType; using KeyType = StreamProcessor::KeyType;
...@@ -23,7 +18,7 @@ AVFormatContext* get_input_format_context( ...@@ -23,7 +18,7 @@ AVFormatContext* get_input_format_context(
const c10::optional<std::string>& format, const c10::optional<std::string>& format,
const c10::optional<OptionDict>& option, const c10::optional<OptionDict>& option,
AVIOContext* io_ctx) { AVIOContext* io_ctx) {
AVFormatContext* p = FFMPEG avformat_alloc_context(); AVFormatContext* p = avformat_alloc_context();
TORCH_CHECK(p, "Failed to allocate AVFormatContext."); TORCH_CHECK(p, "Failed to allocate AVFormatContext.");
if (io_ctx) { if (io_ctx) {
p->pb = io_ctx; p->pb = io_ctx;
...@@ -33,7 +28,7 @@ AVFormatContext* get_input_format_context( ...@@ -33,7 +28,7 @@ AVFormatContext* get_input_format_context(
if (format.has_value()) { if (format.has_value()) {
std::string format_str = format.value(); std::string format_str = format.value();
AVFORMAT_CONST AVInputFormat* pInput = AVFORMAT_CONST AVInputFormat* pInput =
FFMPEG av_find_input_format(format_str.c_str()); av_find_input_format(format_str.c_str());
TORCH_CHECK(pInput, "Unsupported device/format: \"", format_str, "\""); TORCH_CHECK(pInput, "Unsupported device/format: \"", format_str, "\"");
return pInput; return pInput;
} }
...@@ -41,7 +36,7 @@ AVFormatContext* get_input_format_context( ...@@ -41,7 +36,7 @@ AVFormatContext* get_input_format_context(
}(); }();
AVDictionary* opt = get_option_dict(option); AVDictionary* opt = get_option_dict(option);
int ret = FFMPEG avformat_open_input(&p, src.c_str(), pInputFormat, &opt); int ret = avformat_open_input(&p, src.c_str(), pInputFormat, &opt);
clean_up_dict(opt); clean_up_dict(opt);
TORCH_CHECK( TORCH_CHECK(
...@@ -57,7 +52,7 @@ AVFormatContext* get_input_format_context( ...@@ -57,7 +52,7 @@ AVFormatContext* get_input_format_context(
StreamReader::StreamReader(AVFormatContext* p) : format_ctx(p) { StreamReader::StreamReader(AVFormatContext* p) : format_ctx(p) {
C10_LOG_API_USAGE_ONCE("torchaudio.io.StreamReader"); C10_LOG_API_USAGE_ONCE("torchaudio.io.StreamReader");
int ret = FFMPEG avformat_find_stream_info(format_ctx, nullptr); int ret = avformat_find_stream_info(format_ctx, nullptr);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to find stream information: ", av_err2string(ret)); ret >= 0, "Failed to find stream information: ", av_err2string(ret));
...@@ -114,7 +109,7 @@ void validate_src_stream_type( ...@@ -114,7 +109,7 @@ void validate_src_stream_type(
"Stream ", "Stream ",
i, i,
" is not ", " is not ",
FFMPEG av_get_media_type_string(type), av_get_media_type_string(type),
" stream."); " stream.");
} }
...@@ -129,7 +124,7 @@ namespace { ...@@ -129,7 +124,7 @@ namespace {
OptionDict parse_metadata(const AVDictionary* metadata) { OptionDict parse_metadata(const AVDictionary* metadata) {
AVDictionaryEntry* tag = nullptr; AVDictionaryEntry* tag = nullptr;
OptionDict ret; OptionDict ret;
while ((tag = FFMPEG av_dict_get(metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { while ((tag = av_dict_get(metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
ret.emplace(std::string(tag->key), std::string(tag->value)); ret.emplace(std::string(tag->key), std::string(tag->value));
} }
return ret; return ret;
...@@ -152,8 +147,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -152,8 +147,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
ret.num_frames = stream->nb_frames; ret.num_frames = stream->nb_frames;
ret.bits_per_sample = codecpar->bits_per_raw_sample; ret.bits_per_sample = codecpar->bits_per_raw_sample;
ret.metadata = parse_metadata(stream->metadata); ret.metadata = parse_metadata(stream->metadata);
const AVCodecDescriptor* desc = const AVCodecDescriptor* desc = avcodec_descriptor_get(codecpar->codec_id);
FFMPEG avcodec_descriptor_get(codecpar->codec_id);
if (desc) { if (desc) {
ret.codec_name = desc->name; ret.codec_name = desc->name;
ret.codec_long_name = desc->long_name; ret.codec_long_name = desc->long_name;
...@@ -163,7 +157,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -163,7 +157,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
case AVMEDIA_TYPE_AUDIO: { case AVMEDIA_TYPE_AUDIO: {
AVSampleFormat smp_fmt = static_cast<AVSampleFormat>(codecpar->format); AVSampleFormat smp_fmt = static_cast<AVSampleFormat>(codecpar->format);
if (smp_fmt != AV_SAMPLE_FMT_NONE) { if (smp_fmt != AV_SAMPLE_FMT_NONE) {
ret.fmt_name = FFMPEG av_get_sample_fmt_name(smp_fmt); ret.fmt_name = av_get_sample_fmt_name(smp_fmt);
} }
ret.sample_rate = static_cast<double>(codecpar->sample_rate); ret.sample_rate = static_cast<double>(codecpar->sample_rate);
ret.num_channels = codecpar->channels; ret.num_channels = codecpar->channels;
...@@ -172,7 +166,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -172,7 +166,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
case AVMEDIA_TYPE_VIDEO: { case AVMEDIA_TYPE_VIDEO: {
AVPixelFormat pix_fmt = static_cast<AVPixelFormat>(codecpar->format); AVPixelFormat pix_fmt = static_cast<AVPixelFormat>(codecpar->format);
if (pix_fmt != AV_PIX_FMT_NONE) { if (pix_fmt != AV_PIX_FMT_NONE) {
ret.fmt_name = FFMPEG av_get_pix_fmt_name(pix_fmt); ret.fmt_name = av_get_pix_fmt_name(pix_fmt);
} }
ret.width = codecpar->width; ret.width = codecpar->width;
ret.height = codecpar->height; ret.height = codecpar->height;
...@@ -186,7 +180,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -186,7 +180,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
namespace { namespace {
AVCodecParameters* get_codecpar() { AVCodecParameters* get_codecpar() {
AVCodecParameters* ptr = FFMPEG avcodec_parameters_alloc(); AVCodecParameters* ptr = avcodec_parameters_alloc();
TORCH_CHECK(ptr, "Failed to allocate resource."); TORCH_CHECK(ptr, "Failed to allocate resource.");
return ptr; return ptr;
} }
...@@ -197,7 +191,7 @@ StreamParams StreamReader::get_src_stream_params(int i) { ...@@ -197,7 +191,7 @@ StreamParams StreamReader::get_src_stream_params(int i) {
AVStream* stream = format_ctx->streams[i]; AVStream* stream = format_ctx->streams[i];
AVCodecParametersPtr codec_params(get_codecpar()); AVCodecParametersPtr codec_params(get_codecpar());
int ret = FFMPEG avcodec_parameters_copy(codec_params, stream->codecpar); int ret = avcodec_parameters_copy(codec_params, stream->codecpar);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to copy the stream's codec parameters. (", "Failed to copy the stream's codec parameters. (",
...@@ -239,12 +233,12 @@ OutputStreamInfo StreamReader::get_out_stream_info(int i) const { ...@@ -239,12 +233,12 @@ OutputStreamInfo StreamReader::get_out_stream_info(int i) const {
} }
int64_t StreamReader::find_best_audio_stream() const { int64_t StreamReader::find_best_audio_stream() const {
return FFMPEG av_find_best_stream( return av_find_best_stream(
format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0); format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
} }
int64_t StreamReader::find_best_video_stream() const { int64_t StreamReader::find_best_video_stream() const {
return FFMPEG av_find_best_stream( return av_find_best_stream(
format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
} }
...@@ -294,7 +288,7 @@ void StreamReader::seek(double timestamp_s, int64_t mode) { ...@@ -294,7 +288,7 @@ void StreamReader::seek(double timestamp_s, int64_t mode) {
TORCH_CHECK(false, "Invalid mode value: ", mode); TORCH_CHECK(false, "Invalid mode value: ", mode);
} }
int ret = FFMPEG av_seek_frame(format_ctx, -1, timestamp_av_tb, flag); int ret = av_seek_frame(format_ctx, -1, timestamp_av_tb, flag);
if (ret < 0) { if (ret < 0) {
seek_timestamp = 0; seek_timestamp = 0;
...@@ -407,12 +401,12 @@ void StreamReader::add_stream( ...@@ -407,12 +401,12 @@ void StreamReader::add_stream(
case AVMEDIA_TYPE_AUDIO: case AVMEDIA_TYPE_AUDIO:
return AVRational{0, 1}; return AVRational{0, 1};
case AVMEDIA_TYPE_VIDEO: case AVMEDIA_TYPE_VIDEO:
return FFMPEG av_guess_frame_rate(format_ctx, stream, nullptr); return av_guess_frame_rate(format_ctx, stream, nullptr);
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false,
"Unexpected media type is given: ", "Unexpected media type is given: ",
FFMPEG av_get_media_type_string(media_type)); av_get_media_type_string(media_type));
} }
}(); }();
int key = processors[i]->add_stream( int key = processors[i]->add_stream(
...@@ -451,7 +445,7 @@ void StreamReader::remove_stream(int64_t i) { ...@@ -451,7 +445,7 @@ void StreamReader::remove_stream(int64_t i) {
// 1: It's done, caller should stop calling // 1: It's done, caller should stop calling
// <0: Some error happened // <0: Some error happened
int StreamReader::process_packet() { int StreamReader::process_packet() {
int ret = FFMPEG av_read_frame(format_ctx, packet); int ret = av_read_frame(format_ctx, packet);
if (ret == AVERROR_EOF) { if (ret == AVERROR_EOF) {
ret = drain(); ret = drain();
return (ret < 0) ? ret : 1; return (ret < 0) ? ret : 1;
...@@ -582,13 +576,12 @@ AVIOContext* get_io_context( ...@@ -582,13 +576,12 @@ AVIOContext* get_io_context(
int buffer_size, int buffer_size,
int (*read_packet)(void* opaque, uint8_t* buf, int buf_size), int (*read_packet)(void* opaque, uint8_t* buf, int buf_size),
int64_t (*seek)(void* opaque, int64_t offset, int whence)) { int64_t (*seek)(void* opaque, int64_t offset, int whence)) {
unsigned char* buffer = unsigned char* buffer = static_cast<unsigned char*>(av_malloc(buffer_size));
static_cast<unsigned char*>(FFMPEG av_malloc(buffer_size));
TORCH_CHECK(buffer, "Failed to allocate buffer."); TORCH_CHECK(buffer, "Failed to allocate buffer.");
AVIOContext* io_ctx = FFMPEG avio_alloc_context( AVIOContext* io_ctx = avio_alloc_context(
buffer, buffer_size, 0, opaque, read_packet, nullptr, seek); buffer, buffer_size, 0, opaque, read_packet, nullptr, seek);
if (!io_ctx) { if (!io_ctx) {
FFMPEG av_freep(&buffer); av_freep(&buffer);
TORCH_CHECK(false, "Failed to allocate AVIOContext."); TORCH_CHECK(false, "Failed to allocate AVIOContext.");
} }
return io_ctx; return io_ctx;
......
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/encode_process.h> #include <torchaudio/csrc/ffmpeg/stream_writer/encode_process.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#include <cmath> #include <cmath>
extern "C" {
#include <libavutil/rational.h>
}
namespace torchaudio::io { namespace torchaudio::io {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -61,7 +56,7 @@ void EncodeProcess::process_frame(AVFrame* src) { ...@@ -61,7 +56,7 @@ void EncodeProcess::process_frame(AVFrame* src) {
if (ret >= 0) { if (ret >= 0) {
encoder.encode(dst_frame); encoder.encode(dst_frame);
} }
FFMPEG av_frame_unref(dst_frame); av_frame_unref(dst_frame);
} }
} }
...@@ -76,8 +71,8 @@ void EncodeProcess::flush() { ...@@ -76,8 +71,8 @@ void EncodeProcess::flush() {
namespace { namespace {
enum AVSampleFormat get_src_sample_fmt(const std::string& src) { enum AVSampleFormat get_src_sample_fmt(const std::string& src) {
auto fmt = FFMPEG av_get_sample_fmt(src.c_str()); auto fmt = av_get_sample_fmt(src.c_str());
if (fmt != AV_SAMPLE_FMT_NONE && !FFMPEG av_sample_fmt_is_planar(fmt)) { if (fmt != AV_SAMPLE_FMT_NONE && !av_sample_fmt_is_planar(fmt)) {
return fmt; return fmt;
} }
TORCH_CHECK( TORCH_CHECK(
...@@ -94,7 +89,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) { ...@@ -94,7 +89,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) {
AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64,
AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT,
AV_SAMPLE_FMT_DBL}) { AV_SAMPLE_FMT_DBL}) {
ret.emplace_back(FFMPEG av_get_sample_fmt_name(fmt)); ret.emplace_back(av_get_sample_fmt_name(fmt));
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
}(), }(),
...@@ -102,7 +97,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) { ...@@ -102,7 +97,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) {
} }
enum AVPixelFormat get_src_pix_fmt(const std::string& src) { enum AVPixelFormat get_src_pix_fmt(const std::string& src) {
AVPixelFormat fmt = FFMPEG av_get_pix_fmt(src.c_str()); AVPixelFormat fmt = av_get_pix_fmt(src.c_str());
switch (fmt) { switch (fmt) {
case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_GRAY8:
case AV_PIX_FMT_RGB24: case AV_PIX_FMT_RGB24:
...@@ -123,7 +118,7 @@ enum AVPixelFormat get_src_pix_fmt(const std::string& src) { ...@@ -123,7 +118,7 @@ enum AVPixelFormat get_src_pix_fmt(const std::string& src) {
AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB24,
AV_PIX_FMT_BGR24, AV_PIX_FMT_BGR24,
AV_PIX_FMT_YUV444P}) { AV_PIX_FMT_YUV444P}) {
ret.emplace_back(FFMPEG av_get_pix_fmt_name(fmt)); ret.emplace_back(av_get_pix_fmt_name(fmt));
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
}(), }(),
...@@ -137,21 +132,18 @@ const AVCodec* get_codec( ...@@ -137,21 +132,18 @@ const AVCodec* get_codec(
AVCodecID default_codec, AVCodecID default_codec,
const c10::optional<std::string>& encoder) { const c10::optional<std::string>& encoder) {
if (encoder) { if (encoder) {
const AVCodec* c = const AVCodec* c = avcodec_find_encoder_by_name(encoder.value().c_str());
FFMPEG avcodec_find_encoder_by_name(encoder.value().c_str());
TORCH_CHECK(c, "Unexpected codec: ", encoder.value()); TORCH_CHECK(c, "Unexpected codec: ", encoder.value());
return c; return c;
} }
const AVCodec* c = FFMPEG avcodec_find_encoder(default_codec); const AVCodec* c = avcodec_find_encoder(default_codec);
TORCH_CHECK( TORCH_CHECK(
c, c, "Encoder not found for codec: ", avcodec_get_name(default_codec));
"Encoder not found for codec: ",
FFMPEG avcodec_get_name(default_codec));
return c; return c;
} }
AVCodecContextPtr get_codec_ctx(const AVCodec* codec, int flags) { AVCodecContextPtr get_codec_ctx(const AVCodec* codec, int flags) {
AVCodecContext* ctx = FFMPEG avcodec_alloc_context3(codec); AVCodecContext* ctx = avcodec_alloc_context3(codec);
TORCH_CHECK(ctx, "Failed to allocate CodecContext."); TORCH_CHECK(ctx, "Failed to allocate CodecContext.");
if (flags & AVFMT_GLOBALHEADER) { if (flags & AVFMT_GLOBALHEADER) {
...@@ -177,25 +169,25 @@ void open_codec( ...@@ -177,25 +169,25 @@ void open_codec(
// while "libopus" refers to the one depends on libopusenc // while "libopus" refers to the one depends on libopusenc
// https://ffmpeg.org/doxygen/4.1/libopusenc_8c.html#aa1d649e48cd2ec00cfe181cf9d0f3251 // https://ffmpeg.org/doxygen/4.1/libopusenc_8c.html#aa1d649e48cd2ec00cfe181cf9d0f3251
if (std::strcmp(codec_ctx->codec->name, "vorbis") == 0) { if (std::strcmp(codec_ctx->codec->name, "vorbis") == 0) {
if (!FFMPEG av_dict_get(opt, "strict", nullptr, 0)) { if (!av_dict_get(opt, "strict", nullptr, 0)) {
TORCH_WARN_ONCE( TORCH_WARN_ONCE(
"\"vorbis\" encoder is selected. Enabling '-strict experimental'. ", "\"vorbis\" encoder is selected. Enabling '-strict experimental'. ",
"If this is not desired, please provide \"strict\" encoder option ", "If this is not desired, please provide \"strict\" encoder option ",
"with desired value."); "with desired value.");
FFMPEG av_dict_set(&opt, "strict", "experimental", 0); av_dict_set(&opt, "strict", "experimental", 0);
} }
} }
if (std::strcmp(codec_ctx->codec->name, "opus") == 0) { if (std::strcmp(codec_ctx->codec->name, "opus") == 0) {
if (!FFMPEG av_dict_get(opt, "strict", nullptr, 0)) { if (!av_dict_get(opt, "strict", nullptr, 0)) {
TORCH_WARN_ONCE( TORCH_WARN_ONCE(
"\"opus\" encoder is selected. Enabling '-strict experimental'. ", "\"opus\" encoder is selected. Enabling '-strict experimental'. ",
"If this is not desired, please provide \"strict\" encoder option ", "If this is not desired, please provide \"strict\" encoder option ",
"with desired value."); "with desired value.");
FFMPEG av_dict_set(&opt, "strict", "experimental", 0); av_dict_set(&opt, "strict", "experimental", 0);
} }
} }
int ret = FFMPEG avcodec_open2(codec_ctx, codec_ctx->codec, &opt); int ret = avcodec_open2(codec_ctx, codec_ctx->codec, &opt);
clean_up_dict(opt); clean_up_dict(opt);
TORCH_CHECK(ret >= 0, "Failed to open codec: (", av_err2string(ret), ")"); TORCH_CHECK(ret >= 0, "Failed to open codec: (", av_err2string(ret), ")");
} }
...@@ -222,7 +214,7 @@ bool supported_sample_fmt( ...@@ -222,7 +214,7 @@ bool supported_sample_fmt(
std::string get_supported_formats(const AVSampleFormat* sample_fmts) { std::string get_supported_formats(const AVSampleFormat* sample_fmts) {
std::vector<std::string> ret; std::vector<std::string> ret;
while (*sample_fmts != AV_SAMPLE_FMT_NONE) { while (*sample_fmts != AV_SAMPLE_FMT_NONE) {
ret.emplace_back(FFMPEG av_get_sample_fmt_name(*sample_fmts)); ret.emplace_back(av_get_sample_fmt_name(*sample_fmts));
++sample_fmts; ++sample_fmts;
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
...@@ -234,7 +226,7 @@ AVSampleFormat get_enc_fmt( ...@@ -234,7 +226,7 @@ AVSampleFormat get_enc_fmt(
const AVCodec* codec) { const AVCodec* codec) {
if (encoder_format) { if (encoder_format) {
auto& enc_fmt_val = encoder_format.value(); auto& enc_fmt_val = encoder_format.value();
auto fmt = FFMPEG av_get_sample_fmt(enc_fmt_val.c_str()); auto fmt = av_get_sample_fmt(enc_fmt_val.c_str());
TORCH_CHECK( TORCH_CHECK(
fmt != AV_SAMPLE_FMT_NONE, "Unknown sample format: ", enc_fmt_val); fmt != AV_SAMPLE_FMT_NONE, "Unknown sample format: ", enc_fmt_val);
TORCH_CHECK( TORCH_CHECK(
...@@ -321,8 +313,8 @@ std::string get_supported_channels(const uint64_t* channel_layouts) { ...@@ -321,8 +313,8 @@ std::string get_supported_channels(const uint64_t* channel_layouts) {
std::vector<std::string> names; std::vector<std::string> names;
while (*channel_layouts) { while (*channel_layouts) {
std::stringstream ss; std::stringstream ss;
ss << FFMPEG av_get_channel_layout_nb_channels(*channel_layouts); ss << av_get_channel_layout_nb_channels(*channel_layouts);
ss << " (" << FFMPEG av_get_channel_name(*channel_layouts) << ")"; ss << " (" << av_get_channel_name(*channel_layouts) << ")";
names.emplace_back(ss.str()); names.emplace_back(ss.str());
++channel_layouts; ++channel_layouts;
} }
...@@ -339,10 +331,10 @@ uint64_t get_channel_layout( ...@@ -339,10 +331,10 @@ uint64_t get_channel_layout(
TORCH_CHECK( TORCH_CHECK(
val > 0, "The number of channels must be greater than 0. Found: ", val); val > 0, "The number of channels must be greater than 0. Found: ", val);
if (!codec->channel_layouts) { if (!codec->channel_layouts) {
return static_cast<uint64_t>(FFMPEG av_get_default_channel_layout(val)); return static_cast<uint64_t>(av_get_default_channel_layout(val));
} }
for (const uint64_t* it = codec->channel_layouts; *it; ++it) { for (const uint64_t* it = codec->channel_layouts; *it; ++it) {
if (FFMPEG av_get_channel_layout_nb_channels(*it) == val) { if (av_get_channel_layout_nb_channels(*it) == val) {
return *it; return *it;
} }
} }
...@@ -379,9 +371,8 @@ void configure_audio_codec_ctx( ...@@ -379,9 +371,8 @@ void configure_audio_codec_ctx(
const c10::optional<CodecConfig>& codec_config) { const c10::optional<CodecConfig>& codec_config) {
codec_ctx->sample_fmt = format; codec_ctx->sample_fmt = format;
codec_ctx->sample_rate = sample_rate; codec_ctx->sample_rate = sample_rate;
codec_ctx->time_base = av_inv_q(FFMPEG av_d2q(sample_rate, 1 << 24)); codec_ctx->time_base = av_inv_q(av_d2q(sample_rate, 1 << 24));
codec_ctx->channels = codec_ctx->channels = av_get_channel_layout_nb_channels(channel_layout);
FFMPEG av_get_channel_layout_nb_channels(channel_layout);
codec_ctx->channel_layout = channel_layout; codec_ctx->channel_layout = channel_layout;
// Set optional stuff // Set optional stuff
...@@ -420,7 +411,7 @@ bool supported_pix_fmt(const AVPixelFormat fmt, const AVPixelFormat* pix_fmts) { ...@@ -420,7 +411,7 @@ bool supported_pix_fmt(const AVPixelFormat fmt, const AVPixelFormat* pix_fmts) {
std::string get_supported_formats(const AVPixelFormat* pix_fmts) { std::string get_supported_formats(const AVPixelFormat* pix_fmts) {
std::vector<std::string> ret; std::vector<std::string> ret;
while (*pix_fmts != AV_PIX_FMT_NONE) { while (*pix_fmts != AV_PIX_FMT_NONE) {
ret.emplace_back(FFMPEG av_get_pix_fmt_name(*pix_fmts)); ret.emplace_back(av_get_pix_fmt_name(*pix_fmts));
++pix_fmts; ++pix_fmts;
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
...@@ -432,7 +423,7 @@ AVPixelFormat get_enc_fmt( ...@@ -432,7 +423,7 @@ AVPixelFormat get_enc_fmt(
const AVCodec* codec) { const AVCodec* codec) {
if (encoder_format) { if (encoder_format) {
const auto& val = encoder_format.value(); const auto& val = encoder_format.value();
auto fmt = FFMPEG av_get_pix_fmt(val.c_str()); auto fmt = av_get_pix_fmt(val.c_str());
TORCH_CHECK( TORCH_CHECK(
supported_pix_fmt(fmt, codec->pix_fmts), supported_pix_fmt(fmt, codec->pix_fmts),
codec->name, codec->name,
...@@ -470,7 +461,7 @@ AVRational get_enc_rate( ...@@ -470,7 +461,7 @@ AVRational get_enc_rate(
std::isfinite(enc_rate) && enc_rate > 0, std::isfinite(enc_rate) && enc_rate > 0,
"Encoder sample rate must be positive and fininte. Found: ", "Encoder sample rate must be positive and fininte. Found: ",
enc_rate); enc_rate);
AVRational rate = FFMPEG av_d2q(enc_rate, 1 << 24); AVRational rate = av_d2q(enc_rate, 1 << 24);
TORCH_CHECK( TORCH_CHECK(
supported_frame_rate(rate, codec->supported_framerates), supported_frame_rate(rate, codec->supported_framerates),
codec->name, codec->name,
...@@ -554,14 +545,14 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { ...@@ -554,14 +545,14 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
// context to AVCodecContext. But this way, it will be deallocated // context to AVCodecContext. But this way, it will be deallocated
// automatically at the time AVCodecContext is freed, so we do that. // automatically at the time AVCodecContext is freed, so we do that.
ctx->hw_device_ctx = FFMPEG av_buffer_ref(get_cuda_context(device.index())); ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index()));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ctx->hw_device_ctx, "Failed to reference HW device context."); ctx->hw_device_ctx, "Failed to reference HW device context.");
ctx->sw_pix_fmt = ctx->pix_fmt; ctx->sw_pix_fmt = ctx->pix_fmt;
ctx->pix_fmt = AV_PIX_FMT_CUDA; ctx->pix_fmt = AV_PIX_FMT_CUDA;
ctx->hw_frames_ctx = FFMPEG av_hwframe_ctx_alloc(ctx->hw_device_ctx); ctx->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->hw_device_ctx);
TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context."); TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context.");
auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data); auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data);
...@@ -571,7 +562,7 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { ...@@ -571,7 +562,7 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
frames_ctx->height = ctx->height; frames_ctx->height = ctx->height;
frames_ctx->initial_pool_size = 5; frames_ctx->initial_pool_size = 5;
int ret = FFMPEG av_hwframe_ctx_init(ctx->hw_frames_ctx); int ret = av_hwframe_ctx_init(ctx->hw_frames_ctx);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to initialize CUDA frame context: ", "Failed to initialize CUDA frame context: ",
...@@ -583,11 +574,11 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { ...@@ -583,11 +574,11 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
AVStream* get_stream(AVFormatContext* format_ctx, AVCodecContext* codec_ctx) { AVStream* get_stream(AVFormatContext* format_ctx, AVCodecContext* codec_ctx) {
AVStream* stream = FFMPEG avformat_new_stream(format_ctx, nullptr); AVStream* stream = avformat_new_stream(format_ctx, nullptr);
TORCH_CHECK(stream, "Failed to allocate stream."); TORCH_CHECK(stream, "Failed to allocate stream.");
stream->time_base = codec_ctx->time_base; stream->time_base = codec_ctx->time_base;
int ret = FFMPEG avcodec_parameters_from_context(stream->codecpar, codec_ctx); int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to copy the stream parameter: ", av_err2string(ret)); ret >= 0, "Failed to copy the stream parameter: ", av_err2string(ret));
return stream; return stream;
...@@ -614,7 +605,7 @@ FilterGraph get_audio_filter_graph( ...@@ -614,7 +605,7 @@ FilterGraph get_audio_filter_graph(
if (filter_desc || src_fmt != enc_fmt || if (filter_desc || src_fmt != enc_fmt ||
src_sample_rate != enc_sample_rate || src_ch_layout != enc_ch_layout) { src_sample_rate != enc_sample_rate || src_ch_layout != enc_ch_layout) {
std::stringstream ss; std::stringstream ss;
ss << "aformat=sample_fmts=" << FFMPEG av_get_sample_fmt_name(enc_fmt) ss << "aformat=sample_fmts=" << av_get_sample_fmt_name(enc_fmt)
<< ":sample_rates=" << enc_sample_rate << ":channel_layouts=0x" << ":sample_rates=" << enc_sample_rate << ":channel_layouts=0x"
<< std::hex << enc_ch_layout; << std::hex << enc_ch_layout;
parts.push_back(ss.str()); parts.push_back(ss.str());
...@@ -665,7 +656,7 @@ FilterGraph get_video_filter_graph( ...@@ -665,7 +656,7 @@ FilterGraph get_video_filter_graph(
} }
if (filter_desc || src_fmt != enc_fmt) { if (filter_desc || src_fmt != enc_fmt) {
std::stringstream ss; std::stringstream ss;
ss << "format=" << FFMPEG av_get_pix_fmt_name(enc_fmt); ss << "format=" << av_get_pix_fmt_name(enc_fmt);
parts.emplace_back(ss.str()); parts.emplace_back(ss.str());
} }
if (filter_desc || if (filter_desc ||
...@@ -709,7 +700,7 @@ AVFramePtr get_audio_frame( ...@@ -709,7 +700,7 @@ AVFramePtr get_audio_frame(
frame->channel_layout = channel_layout; frame->channel_layout = channel_layout;
frame->sample_rate = sample_rate; frame->sample_rate = sample_rate;
frame->nb_samples = nb_samples; frame->nb_samples = nb_samples;
int ret = FFMPEG av_frame_get_buffer(frame, 0); int ret = av_frame_get_buffer(frame, 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Error allocating the source audio frame:", av_err2string(ret)); ret >= 0, "Error allocating the source audio frame:", av_err2string(ret));
...@@ -725,7 +716,7 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, int width, int height) { ...@@ -725,7 +716,7 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, int width, int height) {
frame->format = src_fmt; frame->format = src_fmt;
frame->width = width; frame->width = width;
frame->height = height; frame->height = height;
int ret = FFMPEG av_frame_get_buffer(frame, 0); int ret = av_frame_get_buffer(frame, 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Error allocating a video buffer :", av_err2string(ret)); ret >= 0, "Error allocating a video buffer :", av_err2string(ret));
...@@ -770,10 +761,10 @@ EncodeProcess get_audio_encode_process( ...@@ -770,10 +761,10 @@ EncodeProcess get_audio_encode_process(
// case, restrictions on the format to support tensor inputs do not apply, and // case, restrictions on the format to support tensor inputs do not apply, and
// so we directly get the format via FFmpeg. // so we directly get the format via FFmpeg.
const AVSampleFormat src_fmt = (disable_converter) const AVSampleFormat src_fmt = (disable_converter)
? FFMPEG av_get_sample_fmt(format.c_str()) ? av_get_sample_fmt(format.c_str())
: get_src_sample_fmt(format); : get_src_sample_fmt(format);
const auto src_ch_layout = static_cast<uint64_t>( const auto src_ch_layout =
FFMPEG av_get_default_channel_layout(src_num_channels)); static_cast<uint64_t>(av_get_default_channel_layout(src_num_channels));
// 2. Fetch codec from default or override // 2. Fetch codec from default or override
TORCH_CHECK( TORCH_CHECK(
...@@ -793,7 +784,7 @@ EncodeProcess get_audio_encode_process( ...@@ -793,7 +784,7 @@ EncodeProcess get_audio_encode_process(
// https://github.com/FFmpeg/FFmpeg/blob/0684e58886881a998f1a7b510d73600ff1df2b90/libavcodec/vorbisenc.c#L1277 // https://github.com/FFmpeg/FFmpeg/blob/0684e58886881a998f1a7b510d73600ff1df2b90/libavcodec/vorbisenc.c#L1277
// This is the case for at least until FFmpeg 6.0, so it will be // This is the case for at least until FFmpeg 6.0, so it will be
// like this for a while. // like this for a while.
return static_cast<uint64_t>(FFMPEG av_get_default_channel_layout(2)); return static_cast<uint64_t>(av_get_default_channel_layout(2));
} }
return get_channel_layout(src_ch_layout, encoder_num_channels, codec); return get_channel_layout(src_ch_layout, encoder_num_channels, codec);
}(); }();
...@@ -881,9 +872,9 @@ EncodeProcess get_video_encode_process( ...@@ -881,9 +872,9 @@ EncodeProcess get_video_encode_process(
// case, restrictions on the format to support tensor inputs do not apply, and // case, restrictions on the format to support tensor inputs do not apply, and
// so we directly get the format via FFmpeg. // so we directly get the format via FFmpeg.
const AVPixelFormat src_fmt = (disable_converter) const AVPixelFormat src_fmt = (disable_converter)
? FFMPEG av_get_pix_fmt(format.c_str()) ? av_get_pix_fmt(format.c_str())
: get_src_pix_fmt(format); : get_src_pix_fmt(format);
const AVRational src_rate = FFMPEG av_d2q(frame_rate, 1 << 24); const AVRational src_rate = av_d2q(frame_rate, 1 << 24);
// 2. Fetch codec from default or override // 2. Fetch codec from default or override
TORCH_CHECK( TORCH_CHECK(
...@@ -950,8 +941,7 @@ EncodeProcess get_video_encode_process( ...@@ -950,8 +941,7 @@ EncodeProcess get_video_encode_process(
AVFramePtr src_frame = [&]() { AVFramePtr src_frame = [&]() {
if (codec_ctx->hw_frames_ctx) { if (codec_ctx->hw_frames_ctx) {
AVFramePtr frame{alloc_avframe()}; AVFramePtr frame{alloc_avframe()};
int ret = int ret = av_hwframe_get_buffer(codec_ctx->hw_frames_ctx, frame, 0);
FFMPEG av_hwframe_get_buffer(codec_ctx->hw_frames_ctx, frame, 0);
TORCH_CHECK(ret >= 0, "Failed to fetch CUDA frame: ", av_err2string(ret)); TORCH_CHECK(ret >= 0, "Failed to fetch CUDA frame: ", av_err2string(ret));
frame->nb_samples = 1; frame->nb_samples = 1;
frame->pts = 0; frame->pts = 0;
......
#include <torchaudio/csrc/ffmpeg/stream_writer/encoder.h> #include <torchaudio/csrc/ffmpeg/stream_writer/encoder.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
namespace torchaudio::io { namespace torchaudio::io {
...@@ -14,10 +13,10 @@ Encoder::Encoder( ...@@ -14,10 +13,10 @@ Encoder::Encoder(
/// ///
/// @param frame Frame data to encode /// @param frame Frame data to encode
void Encoder::encode(AVFrame* frame) { void Encoder::encode(AVFrame* frame) {
int ret = FFMPEG avcodec_send_frame(codec_ctx, frame); int ret = avcodec_send_frame(codec_ctx, frame);
TORCH_CHECK(ret >= 0, "Failed to encode frame (", av_err2string(ret), ")."); TORCH_CHECK(ret >= 0, "Failed to encode frame (", av_err2string(ret), ").");
while (ret >= 0) { while (ret >= 0) {
ret = FFMPEG avcodec_receive_packet(codec_ctx, packet); ret = avcodec_receive_packet(codec_ctx, packet);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
if (ret == AVERROR_EOF) { if (ret == AVERROR_EOF) {
// Note: // Note:
...@@ -32,7 +31,7 @@ void Encoder::encode(AVFrame* frame) { ...@@ -32,7 +31,7 @@ void Encoder::encode(AVFrame* frame) {
// An alternative is to use `av_write_frame` functoin, but in that case // An alternative is to use `av_write_frame` functoin, but in that case
// client code is responsible for ordering packets, which makes it // client code is responsible for ordering packets, which makes it
// complicated to use StreamWriter // complicated to use StreamWriter
ret = FFMPEG av_interleaved_write_frame(format_ctx, nullptr); ret = av_interleaved_write_frame(format_ctx, nullptr);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to flush packet (", av_err2string(ret), ")."); ret >= 0, "Failed to flush packet (", av_err2string(ret), ").");
} }
...@@ -52,11 +51,10 @@ void Encoder::encode(AVFrame* frame) { ...@@ -52,11 +51,10 @@ void Encoder::encode(AVFrame* frame) {
// This has to be set before av_packet_rescale_ts bellow. // This has to be set before av_packet_rescale_ts bellow.
packet->duration = 1; packet->duration = 1;
} }
FFMPEG av_packet_rescale_ts( av_packet_rescale_ts(packet, codec_ctx->time_base, stream->time_base);
packet, codec_ctx->time_base, stream->time_base);
packet->stream_index = stream->index; packet->stream_index = stream->index;
ret = FFMPEG av_interleaved_write_frame(format_ctx, packet); ret = av_interleaved_write_frame(format_ctx, packet);
TORCH_CHECK(ret >= 0, "Failed to write packet (", av_err2string(ret), ")."); TORCH_CHECK(ret >= 0, "Failed to write packet (", av_err2string(ret), ").");
} }
} }
......
#include <torchaudio/csrc/ffmpeg/stream_writer/packet_writer.h> #include <torchaudio/csrc/ffmpeg/stream_writer/packet_writer.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
namespace torchaudio::io { namespace torchaudio::io {
namespace { namespace {
AVStream* add_stream( AVStream* add_stream(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
const StreamParams& stream_params) { const StreamParams& stream_params) {
AVStream* stream = FFMPEG avformat_new_stream(format_ctx, nullptr); AVStream* stream = avformat_new_stream(format_ctx, nullptr);
int ret = FFMPEG avcodec_parameters_copy( int ret =
stream->codecpar, stream_params.codec_params); avcodec_parameters_copy(stream->codecpar, stream_params.codec_params);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to copy the stream's codec parameters. (", "Failed to copy the stream's codec parameters. (",
...@@ -27,12 +26,11 @@ PacketWriter::PacketWriter( ...@@ -27,12 +26,11 @@ PacketWriter::PacketWriter(
void PacketWriter::write_packet(const AVPacketPtr& packet) { void PacketWriter::write_packet(const AVPacketPtr& packet) {
AVPacket dst_packet; AVPacket dst_packet;
int ret = FFMPEG av_packet_ref(&dst_packet, packet); int ret = av_packet_ref(&dst_packet, packet);
TORCH_CHECK(ret >= 0, "Failed to copy packet."); TORCH_CHECK(ret >= 0, "Failed to copy packet.");
FFMPEG av_packet_rescale_ts( av_packet_rescale_ts(&dst_packet, original_time_base, stream->time_base);
&dst_packet, original_time_base, stream->time_base);
dst_packet.stream_index = stream->index; dst_packet.stream_index = stream->index;
ret = FFMPEG av_interleaved_write_frame(format_ctx, &dst_packet); ret = av_interleaved_write_frame(format_ctx, &dst_packet);
TORCH_CHECK(ret >= 0, "Failed to write packet to destination."); TORCH_CHECK(ret >= 0, "Failed to write packet to destination.");
} }
} // namespace torchaudio::io } // namespace torchaudio::io
#include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h> #include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#ifdef USE_CUDA #ifdef USE_CUDA
#include <c10/cuda/CUDAStream.h> #include <c10/cuda/CUDAStream.h>
#endif #endif
namespace torchaudio::io { namespace torchaudio {
namespace io {
namespace { namespace {
AVFormatContext* get_output_format_context( AVFormatContext* get_output_format_context(
...@@ -19,7 +19,7 @@ AVFormatContext* get_output_format_context( ...@@ -19,7 +19,7 @@ AVFormatContext* get_output_format_context(
} }
AVFormatContext* p = nullptr; AVFormatContext* p = nullptr;
int ret = FFMPEG avformat_alloc_output_context2( int ret = avformat_alloc_output_context2(
&p, nullptr, format ? format.value().c_str() : nullptr, dst.c_str()); &p, nullptr, format ? format.value().c_str() : nullptr, dst.c_str());
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
...@@ -208,14 +208,14 @@ void StreamWriter::add_video_frame_stream( ...@@ -208,14 +208,14 @@ void StreamWriter::add_video_frame_stream(
} }
void StreamWriter::set_metadata(const OptionDict& metadata) { void StreamWriter::set_metadata(const OptionDict& metadata) {
FFMPEG av_dict_free(&format_ctx->metadata); av_dict_free(&format_ctx->metadata);
for (auto const& [key, value] : metadata) { for (auto const& [key, value] : metadata) {
FFMPEG av_dict_set(&format_ctx->metadata, key.c_str(), value.c_str(), 0); av_dict_set(&format_ctx->metadata, key.c_str(), value.c_str(), 0);
} }
} }
void StreamWriter::dump_format(int64_t i) { void StreamWriter::dump_format(int64_t i) {
FFMPEG av_dump_format(format_ctx, (int)i, format_ctx->url, 1); av_dump_format(format_ctx, (int)i, format_ctx->url, 1);
} }
void StreamWriter::open(const c10::optional<OptionDict>& option) { void StreamWriter::open(const c10::optional<OptionDict>& option) {
...@@ -231,10 +231,10 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) { ...@@ -231,10 +231,10 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) {
AVDictionary* opt = get_option_dict(option); AVDictionary* opt = get_option_dict(option);
if (!(fmt->flags & AVFMT_NOFILE) && if (!(fmt->flags & AVFMT_NOFILE) &&
!(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) { !(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) {
ret = FFMPEG avio_open2( ret = avio_open2(
&format_ctx->pb, format_ctx->url, AVIO_FLAG_WRITE, nullptr, &opt); &format_ctx->pb, format_ctx->url, AVIO_FLAG_WRITE, nullptr, &opt);
if (ret < 0) { if (ret < 0) {
FFMPEG av_dict_free(&opt); av_dict_free(&opt);
TORCH_CHECK( TORCH_CHECK(
false, false,
"Failed to open dst: ", "Failed to open dst: ",
...@@ -245,7 +245,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) { ...@@ -245,7 +245,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) {
} }
} }
ret = FFMPEG avformat_write_header(format_ctx, &opt); ret = avformat_write_header(format_ctx, &opt);
clean_up_dict(opt); clean_up_dict(opt);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
...@@ -258,7 +258,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) { ...@@ -258,7 +258,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) {
} }
void StreamWriter::close() { void StreamWriter::close() {
int ret = FFMPEG av_write_trailer(format_ctx); int ret = av_write_trailer(format_ctx);
if (ret < 0) { if (ret < 0) {
LOG(WARNING) << "Failed to write trailer. (" << av_err2string(ret) << ")."; LOG(WARNING) << "Failed to write trailer. (" << av_err2string(ret) << ").";
} }
...@@ -269,7 +269,7 @@ void StreamWriter::close() { ...@@ -269,7 +269,7 @@ void StreamWriter::close() {
if (!(fmt->flags & AVFMT_NOFILE) && if (!(fmt->flags & AVFMT_NOFILE) &&
!(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) { !(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) {
// avio_closep can be only applied to AVIOContext opened by avio_open // avio_closep can be only applied to AVIOContext opened by avio_open
FFMPEG avio_closep(&(format_ctx->pb)); avio_closep(&(format_ctx->pb));
} }
is_open = false; is_open = false;
} }
...@@ -355,13 +355,12 @@ AVIOContext* get_io_context( ...@@ -355,13 +355,12 @@ AVIOContext* get_io_context(
int buffer_size, int buffer_size,
int (*write_packet)(void* opaque, uint8_t* buf, int buf_size), int (*write_packet)(void* opaque, uint8_t* buf, int buf_size),
int64_t (*seek)(void* opaque, int64_t offset, int whence)) { int64_t (*seek)(void* opaque, int64_t offset, int whence)) {
unsigned char* buffer = unsigned char* buffer = static_cast<unsigned char*>(av_malloc(buffer_size));
static_cast<unsigned char*>(FFMPEG av_malloc(buffer_size));
TORCH_CHECK(buffer, "Failed to allocate buffer."); TORCH_CHECK(buffer, "Failed to allocate buffer.");
AVIOContext* io_ctx = FFMPEG avio_alloc_context( AVIOContext* io_ctx = avio_alloc_context(
buffer, buffer_size, 1, opaque, nullptr, write_packet, seek); buffer, buffer_size, 1, opaque, nullptr, write_packet, seek);
if (!io_ctx) { if (!io_ctx) {
FFMPEG av_freep(&buffer); av_freep(&buffer);
TORCH_CHECK(false, "Failed to allocate AVIOContext."); TORCH_CHECK(false, "Failed to allocate AVIOContext.");
} }
return io_ctx; return io_ctx;
...@@ -385,4 +384,5 @@ StreamWriterCustomIO::StreamWriterCustomIO( ...@@ -385,4 +384,5 @@ StreamWriterCustomIO::StreamWriterCustomIO(
: CustomOutput(opaque, buffer_size, write_packet, seek), : CustomOutput(opaque, buffer_size, write_packet, seek),
StreamWriter(io_ctx, format) {} StreamWriter(io_ctx, format) {}
} // namespace torchaudio::io } // namespace io
} // namespace torchaudio
#include <torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.h> #include <torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
#ifdef USE_CUDA #ifdef USE_CUDA
#include <c10/cuda/CUDAStream.h> #include <c10/cuda/CUDAStream.h>
#endif #endif
namespace torchaudio::io { namespace torchaudio::io {
namespace { namespace {
using InitFunc = TensorConverter::InitFunc; using InitFunc = TensorConverter::InitFunc;
...@@ -41,8 +41,8 @@ void convert_func_(const torch::Tensor& chunk, AVFrame* buffer) { ...@@ -41,8 +41,8 @@ void convert_func_(const torch::Tensor& chunk, AVFrame* buffer) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(chunk.size(1) == buffer->channels); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(chunk.size(1) == buffer->channels);
// https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00334 // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00334
if (!FFMPEG av_frame_is_writable(buffer)) { if (!av_frame_is_writable(buffer)) {
int ret = FFMPEG av_frame_make_writable(buffer); int ret = av_frame_make_writable(buffer);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); ret >= 0, "Failed to make frame writable: ", av_err2string(ret));
} }
...@@ -145,8 +145,8 @@ void write_interlaced_video( ...@@ -145,8 +145,8 @@ void write_interlaced_video(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3) == num_channels); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3) == num_channels);
// https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472 // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472
if (!FFMPEG av_frame_is_writable(buffer)) { if (!av_frame_is_writable(buffer)) {
int ret = FFMPEG av_frame_make_writable(buffer); int ret = av_frame_make_writable(buffer);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); ret >= 0, "Failed to make frame writable: ", av_err2string(ret));
} }
...@@ -187,7 +187,7 @@ void write_planar_video( ...@@ -187,7 +187,7 @@ void write_planar_video(
AVFrame* buffer, AVFrame* buffer,
int num_planes) { int num_planes) {
const auto num_colors = const auto num_colors =
FFMPEG av_pix_fmt_desc_get((AVPixelFormat)buffer->format)->nb_components; av_pix_fmt_desc_get((AVPixelFormat)buffer->format)->nb_components;
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.dim() == 4); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.dim() == 4);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(0) == 1); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(0) == 1);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(1) == num_colors); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(1) == num_colors);
...@@ -195,8 +195,8 @@ void write_planar_video( ...@@ -195,8 +195,8 @@ void write_planar_video(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3), buffer->width); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3), buffer->width);
// https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472 // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472
if (!FFMPEG av_frame_is_writable(buffer)) { if (!av_frame_is_writable(buffer)) {
int ret = FFMPEG av_frame_make_writable(buffer); int ret = av_frame_make_writable(buffer);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); ret >= 0, "Failed to make frame writable: ", av_err2string(ret));
} }
...@@ -308,7 +308,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) { ...@@ -308,7 +308,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) {
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unexpected pixel format for CUDA: ", "Unexpected pixel format for CUDA: ",
FFMPEG av_get_pix_fmt_name(sw_pix_fmt)); av_get_pix_fmt_name(sw_pix_fmt));
} }
} }
...@@ -317,7 +317,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) { ...@@ -317,7 +317,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) {
case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_GRAY8:
case AV_PIX_FMT_RGB24: case AV_PIX_FMT_RGB24:
case AV_PIX_FMT_BGR24: { case AV_PIX_FMT_BGR24: {
int channels = FFMPEG av_pix_fmt_desc_get(pix_fmt)->nb_components; int channels = av_pix_fmt_desc_get(pix_fmt)->nb_components;
InitFunc init_func = [=](const torch::Tensor& t, AVFrame* f) { InitFunc init_func = [=](const torch::Tensor& t, AVFrame* f) {
validate_video_input(t, f, channels); validate_video_input(t, f, channels);
return init_interlaced(t); return init_interlaced(t);
...@@ -339,9 +339,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) { ...@@ -339,9 +339,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) {
} }
default: default:
TORCH_CHECK( TORCH_CHECK(
false, false, "Unexpected pixel format: ", av_get_pix_fmt_name(pix_fmt));
"Unexpected pixel format: ",
FFMPEG av_get_pix_fmt_name(pix_fmt));
} }
} }
...@@ -385,9 +383,7 @@ TensorConverter::TensorConverter(AVMediaType type, AVFrame* buf, int buf_size) ...@@ -385,9 +383,7 @@ TensorConverter::TensorConverter(AVMediaType type, AVFrame* buf, int buf_size)
break; break;
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false, "Unsupported media type: ", av_get_media_type_string(type));
"Unsupported media type: ",
FFMPEG av_get_media_type_string(type));
} }
} }
......
#ifdef DLOPEN_FFMPEG
#include <ATen/DynamicLibrary.h>
#include <c10/util/CallOnce.h>
#include <torchaudio/csrc/ffmpeg/stub.h>
extern "C" {
#include <libavcodec/version.h>
#include <libavdevice/version.h>
#include <libavfilter/version.h>
#include <libavformat/version.h>
#include <libavutil/version.h>
}
namespace torchaudio::io::detail {
namespace {
class StubImpl {
at::DynamicLibrary libavutil;
at::DynamicLibrary libavcodec;
at::DynamicLibrary libavformat;
at::DynamicLibrary libavdevice;
at::DynamicLibrary libavfilter;
public:
// The struct that holds all the function pointers to be used.
FFmpegStub stub{};
StubImpl(
const char* util,
const char* codec,
const char* format,
const char* device,
const char* filter)
: libavutil(util),
libavcodec(codec),
libavformat(format),
libavdevice(device),
libavfilter(filter) {
#define set(X) stub.X = (decltype(FFmpegStub::X))libavutil.sym(#X)
set(av_buffer_ref);
set(av_buffer_unref);
set(av_d2q);
set(av_dict_free);
set(av_dict_get);
set(av_dict_set);
set(av_frame_alloc);
set(av_frame_free);
set(av_frame_get_buffer);
set(av_frame_is_writable);
set(av_frame_make_writable);
set(av_frame_unref);
set(av_freep);
set(av_get_channel_layout_nb_channels);
set(av_get_channel_name);
set(av_get_default_channel_layout);
set(av_get_media_type_string);
set(av_get_pix_fmt);
set(av_get_pix_fmt_name);
set(av_get_sample_fmt);
set(av_get_sample_fmt_name);
set(av_get_time_base_q);
set(av_hwdevice_ctx_create);
set(av_hwframe_ctx_alloc);
set(av_hwframe_ctx_init);
set(av_hwframe_get_buffer);
set(av_log_get_level);
set(av_log_set_level);
set(av_malloc);
set(av_pix_fmt_desc_get);
set(av_rescale_q);
set(av_sample_fmt_is_planar);
set(av_strdup);
set(av_strerror);
set(avutil_version);
#undef set
#define set(X) stub.X = (decltype(FFmpegStub::X))libavcodec.sym(#X)
set(av_codec_is_decoder);
set(av_codec_is_encoder);
set(av_codec_iterate);
set(av_packet_alloc);
set(av_packet_clone);
set(av_packet_free);
set(av_packet_ref);
set(av_packet_rescale_ts);
set(av_packet_unref);
set(avcodec_alloc_context3);
set(avcodec_configuration);
set(avcodec_descriptor_get);
set(avcodec_find_decoder);
set(avcodec_find_decoder_by_name);
set(avcodec_find_encoder);
set(avcodec_find_encoder_by_name);
set(avcodec_flush_buffers);
set(avcodec_free_context);
set(avcodec_get_hw_config);
set(avcodec_get_name);
set(avcodec_open2);
set(avcodec_parameters_alloc);
set(avcodec_parameters_copy);
set(avcodec_parameters_free);
set(avcodec_parameters_from_context);
set(avcodec_parameters_to_context);
set(avcodec_receive_frame);
set(avcodec_receive_packet);
set(avcodec_send_frame);
set(avcodec_send_packet);
set(avcodec_version);
#undef set
#define set(X) stub.X = (decltype(FFmpegStub::X))libavformat.sym(#X)
set(av_demuxer_iterate);
set(av_dump_format);
set(av_find_best_stream);
set(av_find_input_format);
set(av_guess_frame_rate);
set(av_interleaved_write_frame);
set(av_muxer_iterate);
set(av_read_frame);
set(av_seek_frame);
set(av_write_trailer);
set(avio_alloc_context);
set(avio_enum_protocols);
set(avio_closep);
set(avio_flush);
set(avio_open2);
set(avformat_alloc_context);
set(avformat_alloc_output_context2);
set(avformat_close_input);
set(avformat_find_stream_info);
set(avformat_free_context);
set(avformat_new_stream);
set(avformat_open_input);
set(avformat_version);
set(avformat_write_header);
#undef set
#define set(X) stub.X = (decltype(FFmpegStub::X))libavdevice.sym(#X)
set(avdevice_register_all);
set(avdevice_version);
#undef set
#define set(X) stub.X = (decltype(FFmpegStub::X))libavfilter.sym(#X)
set(av_buffersink_get_frame);
set(av_buffersrc_add_frame_flags);
set(avfilter_get_by_name);
set(avfilter_graph_alloc);
set(avfilter_graph_config);
set(avfilter_graph_create_filter);
set(avfilter_graph_free);
set(avfilter_graph_parse_ptr);
set(avfilter_inout_alloc);
set(avfilter_inout_free);
set(avfilter_version);
#undef set
}
};
static std::unique_ptr<StubImpl> _stub;
void _init_stub() {
#if defined(_WIN32)
_stub = std::make_unique<StubImpl>(
"avutil-" AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR) ".dll",
"avcodec-" AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) ".dll",
"avformat-" AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR) ".dll",
"avdevice-" AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR) ".dll",
"avfilter-" AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR) ".dll");
#elif defined(__APPLE__)
_stub = std::make_unique<StubImpl>(
"libavutil." AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR) ".dylib",
"libavcodec." AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) ".dylib",
"libavformat." AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR) ".dylib",
"libavdevice." AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR) ".dylib",
"libavfilter." AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR) ".dylib");
#else
_stub = std::make_unique<StubImpl>(
"libavutil.so." AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR),
"libavcodec.so." AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR),
"libavformat.so." AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR),
"libavdevice.so." AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR),
"libavfilter.so." AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR));
#endif
}
} // namespace
FFmpegStub& ffmpeg_stub() {
static c10::once_flag init_flag;
c10::call_once(init_flag, _init_stub);
return _stub->stub;
}
} // namespace torchaudio::io::detail
#endif
#pragma once
// Abstraction of the access to FFmpeg libraries.
//
// Do not include this in header files.
// Include this header in implementation files and prepend
// all the calls to libav functions with FFMPEG macro.
//
// If DLOPEN_FFMPEG is not defined, FFMPEG macro is empty.
// In this case, FFmpeg libraries are linked at the time torchaudio is built.
//
// If DLOPEN_FFMPEG is defined, FFMPEG macro becomes a function call to
// fetch a stub instance of FFmpeg libraries.
// This function also initializes the function pointers by automatically
// dlopens all the required libraries.
//
#ifndef DLOPEN_FFMPEG
#define FFMPEG
#else
#define FFMPEG detail::ffmpeg_stub().
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
namespace torchaudio::io::detail {
struct FFmpegStub;
// dlopen FFmpeg libraries and populate the methods of stub instance,
// then return the reference to the stub instance
FFmpegStub& ffmpeg_stub();
struct FFmpegStub {
/////////////////////////////////////////////////////////////////////////////
// libavutil
/////////////////////////////////////////////////////////////////////////////
AVBufferRef* (*av_buffer_ref)(const AVBufferRef*);
void (*av_buffer_unref)(AVBufferRef**);
AVRational (*av_d2q)(double, int) av_const;
void (*av_dict_free)(AVDictionary**);
AVDictionaryEntry* (*av_dict_get)(
const AVDictionary*,
const char*,
const AVDictionaryEntry*,
int);
int (*av_dict_set)(AVDictionary**, const char*, const char*, int);
AVFrame* (*av_frame_alloc)();
void (*av_frame_free)(AVFrame**);
int (*av_frame_get_buffer)(AVFrame*, int);
int (*av_frame_is_writable)(AVFrame*);
int (*av_frame_make_writable)(AVFrame*);
void (*av_frame_unref)(AVFrame*);
void (*av_freep)(void*);
int (*av_get_channel_layout_nb_channels)(uint64_t);
const char* (*av_get_channel_name)(uint64_t);
int64_t (*av_get_default_channel_layout)(int);
const char* (*av_get_media_type_string)(enum AVMediaType);
enum AVPixelFormat (*av_get_pix_fmt)(const char*);
const char* (*av_get_pix_fmt_name)(enum AVPixelFormat);
enum AVSampleFormat (*av_get_sample_fmt)(const char*);
const char* (*av_get_sample_fmt_name)(enum AVSampleFormat);
AVRational (*av_get_time_base_q)();
int (*av_hwdevice_ctx_create)(
AVBufferRef**,
enum AVHWDeviceType,
const char*,
AVDictionary*,
int);
AVBufferRef* (*av_hwframe_ctx_alloc)(AVBufferRef*);
int (*av_hwframe_ctx_init)(AVBufferRef*);
int (*av_hwframe_get_buffer)(AVBufferRef*, AVFrame*, int);
int (*av_log_get_level)();
void (*av_log_set_level)(int);
void* (*av_malloc)(size_t);
const AVPixFmtDescriptor* (*av_pix_fmt_desc_get)(enum AVPixelFormat);
int64_t (*av_rescale_q)(int64_t, AVRational, AVRational) av_const;
int (*av_sample_fmt_is_planar)(enum AVSampleFormat);
char* (*av_strdup)(const char*);
int (*av_strerror)(int, char*, size_t);
unsigned (*avutil_version)();
/////////////////////////////////////////////////////////////////////////////
// libavcodec
/////////////////////////////////////////////////////////////////////////////
int (*av_codec_is_decoder)(const AVCodec*);
int (*av_codec_is_encoder)(const AVCodec*);
const AVCodec* (*av_codec_iterate)(void**);
AVPacket* (*av_packet_alloc)();
AVPacket* (*av_packet_clone)(const AVPacket*);
void (*av_packet_free)(AVPacket**);
int (*av_packet_ref)(AVPacket*, const AVPacket*);
void (*av_packet_rescale_ts)(AVPacket*, AVRational, AVRational);
void (*av_packet_unref)(AVPacket*);
AVCodecContext* (*avcodec_alloc_context3)(const AVCodec*);
const char* (*avcodec_configuration)();
const AVCodecDescriptor* (*avcodec_descriptor_get)(enum AVCodecID);
AVCodec* (*avcodec_find_decoder)(enum AVCodecID);
AVCodec* (*avcodec_find_decoder_by_name)(const char*);
AVCodec* (*avcodec_find_encoder)(enum AVCodecID);
AVCodec* (*avcodec_find_encoder_by_name)(const char*);
void (*avcodec_flush_buffers)(AVCodecContext*);
void (*avcodec_free_context)(AVCodecContext**);
const AVCodecHWConfig* (*avcodec_get_hw_config)(const AVCodec*, int);
const char* (*avcodec_get_name)(enum AVCodecID);
int (*avcodec_open2)(AVCodecContext*, const AVCodec*, AVDictionary**);
AVCodecParameters* (*avcodec_parameters_alloc)();
int (*avcodec_parameters_copy)(AVCodecParameters*, const AVCodecParameters*);
void (*avcodec_parameters_free)(AVCodecParameters**);
int (*avcodec_parameters_from_context)(
AVCodecParameters*,
const AVCodecContext*);
int (*avcodec_parameters_to_context)(
AVCodecContext*,
const AVCodecParameters*);
int (*avcodec_receive_frame)(AVCodecContext*, AVFrame*);
int (*avcodec_receive_packet)(AVCodecContext*, AVPacket*);
int (*avcodec_send_frame)(AVCodecContext*, const AVFrame*);
int (*avcodec_send_packet)(AVCodecContext*, const AVPacket*);
unsigned (*avcodec_version)();
/////////////////////////////////////////////////////////////////////////////
// libavformat
/////////////////////////////////////////////////////////////////////////////
const AVInputFormat* (*av_demuxer_iterate)(void**);
void (*av_dump_format)(AVFormatContext*, int, const char*, int);
int (*av_find_best_stream)(
AVFormatContext*,
enum AVMediaType,
int,
int,
AVCodec**,
int);
AVInputFormat* (*av_find_input_format)(const char*);
AVRational (*av_guess_frame_rate)(AVFormatContext*, AVStream*, AVFrame*);
int (*av_interleaved_write_frame)(AVFormatContext*, AVPacket*);
const AVOutputFormat* (*av_muxer_iterate)(void**);
int (*av_read_frame)(AVFormatContext*, AVPacket*);
int (*av_seek_frame)(AVFormatContext*, int, int64_t, int);
int (*av_write_trailer)(AVFormatContext* s);
AVIOContext* (*avio_alloc_context)(
unsigned char*,
int,
int,
void*,
int (*)(void*, uint8_t*, int),
int (*)(void*, uint8_t*, int),
int64_t (*)(void*, int64_t, int));
const char* (*avio_enum_protocols)(void**, int);
int (*avio_closep)(AVIOContext**);
void (*avio_flush)(AVIOContext*);
int (*avio_open2)(
AVIOContext**,
const char*,
int,
const AVIOInterruptCB*,
AVDictionary**);
AVFormatContext* (*avformat_alloc_context)();
int (*avformat_alloc_output_context2)(
AVFormatContext**,
AVOutputFormat*,
const char*,
const char*);
void (*avformat_close_input)(AVFormatContext**);
int (*avformat_find_stream_info)(AVFormatContext*, AVDictionary**);
void (*avformat_free_context)(AVFormatContext*);
AVStream* (*avformat_new_stream)(AVFormatContext*, const AVCodec*);
int (*avformat_open_input)(
AVFormatContext**,
const char*,
AVFORMAT_CONST AVInputFormat*,
AVDictionary**);
unsigned (*avformat_version)();
int (*avformat_write_header)(AVFormatContext*, AVDictionary**);
/////////////////////////////////////////////////////////////////////////////
// libavdevice
/////////////////////////////////////////////////////////////////////////////
void (*avdevice_register_all)();
unsigned (*avdevice_version)();
/////////////////////////////////////////////////////////////////////////////
// libavfilter
/////////////////////////////////////////////////////////////////////////////
int (*av_buffersink_get_frame)(AVFilterContext*, AVFrame*);
int (*av_buffersrc_add_frame_flags)(AVFilterContext*, AVFrame*, int);
const AVFilter* (*avfilter_get_by_name)(const char*);
AVFilterGraph* (*avfilter_graph_alloc)();
int (*avfilter_graph_config)(AVFilterGraph*, void*);
int (*avfilter_graph_create_filter)(
AVFilterContext**,
const AVFilter*,
const char*,
const char*,
void*,
AVFilterGraph*);
void (*avfilter_graph_free)(AVFilterGraph**);
int (*avfilter_graph_parse_ptr)(
AVFilterGraph*,
const char*,
AVFilterInOut**,
AVFilterInOut**,
void*);
AVFilterInOut* (*avfilter_inout_alloc)();
void (*avfilter_inout_free)(AVFilterInOut**);
unsigned (*avfilter_version)();
};
} // namespace torchaudio::io::detail
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment